## Load libraries
library(covid19)
library(ggplot2)
library(lubridate)
library(dplyr)
library(ggplot2)
library(sp)
library(raster)
library(viridis)
library(ggthemes)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(RColorBrewer)
library(readr)
library(zoo)
library(tidyr)
options(scipen = '999')
# Number of Africa countries
pd <- df_country
pd <- pd %>% left_join(world_pop) %>%
filter(sub_region %in% c('Sub-Saharan Africa')) %>%
filter(cases > 0)
pd <- pd %>% group_by(date) %>% tally
ggplot(data = pd,
aes(x = date,
y = n)) +
# geom_line() +
geom_area(fill = 'darkorange',
color = 'black',
alpha = 0.6) +
theme_simple() +
labs(x = 'Date',
y = 'Countries',
title = 'Sub-Saharan African countries with confirmed COVID-19 cases')
# Overall Africa cases
pd <- df_country
pd <- pd %>% left_join(world_pop) %>%
filter(sub_region %in% c('Sub-Saharan Africa'))
x = pd %>%
group_by(date) %>%
summarise(cases = sum(cases),
deaths = sum(deaths),
n = length(country[cases > 0]))
x %>% filter(date == '2020-04-07' | date == max(date) | date == '2020-03-13')
# A tibble: 3 x 4
date cases deaths n
<date> <dbl> <dbl> <int>
1 2020-03-13 45 0 10
2 2020-04-07 5634 106 42
3 2020-04-30 22454 505 43
# Tests
pd <- testing
entity_split <- strsplit(pd$Entity, split = ' - ')
pd$country <- unlist(lapply(entity_split, function(x){x[1]}))
pd$key <- unlist(lapply(entity_split, function(x){x[2]}))
# pd <- pd %>% filter(key == 'tests performed')
pd <- pd %>% left_join(world_pop)# %>%
# filter(sub_region %in% c('Sub-Saharan Africa', 'Southern Europe', 'Northern Europe', 'Western Europe'))
ggplot(data = pd,
aes(x = Date,
y = `Cumulative total`,
group = country)) +
geom_line(aes(color = country))
x = pd %>% group_by(country) %>%
filter(Date == max(Date)) %>%
ungroup %>%
mutate(sub_region = ifelse(sub_region == 'Sub-Saharan Africa',
'SSA', 'Other')) %>%
filter(!is.na(sub_region)) %>%
arrange(`Cumulative total`)
x$sub_region <- factor(x$sub_region, levels = unique(x$sub_region))
x$country <- factor(x$country, levels = unique(x$country))
ggplot(data = x,
aes(x = country,
y = `Cumulative total`)) +
geom_bar(aes(fill = sub_region), stat = 'identity') +
theme_simple() +
scale_fill_manual(name = '',
values = c('grey', 'darkorange')) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x= 'Country',
title = 'Tests administered')
pd = esp_df %>%
left_join(esp_pop) %>%
mutate(p = deaths_non_cum / pop * 1000000)
ggplot(data = pd,
aes(x = date,
y = p)) +
# geom_step() +
geom_bar(stat = 'identity',
fill = 'red',
alpha = 0.6,
color = NA) +
# geom_ribbon(aes(x = date, ymin = 0, ymax = p), data = pd, fill = 'blue') +
facet_wrap(~ccaa) +
theme_minimal() +
labs(x = 'Date',
y = 'Daily deaths per 1,000,000',
title = 'Daily deaths per 1,000,000 population')
pd <- df_country %>%
filter(country == 'Spain')
ggplot(data = pd,
aes(x = date,
y = cases_non_cum)) +
geom_bar(stat = 'identity') +
theme_simple() +
labs(x = 'Fecha',
y = 'Casos diarios',
title = 'Casos confirmados nuevos')
pd <- df_country %>%
filter(country == 'Spain')
ggplot(data = pd,
aes(x = date,
y = deaths_non_cum)) +
geom_bar(stat = 'identity') +
theme_simple() +
labs(x = 'Fecha',
y = 'Muertes diarias',
title = 'Muertes')
covid19::plot_day_zero(countries = c('Italy', 'Spain', 'US', 'Germany',
'Canada', 'UK', 'Netherlands'
),
ylog = F,
day0 = 1,
cumulative = F,
calendar = T,
pop = T,
point_alpha = 0,
color_var = 'geo')
pd <- df_country %>%
left_join(world_pop %>% dplyr::select(iso, pop)) %>%
group_by(country) %>%
mutate(max_date = max(date)) %>%
mutate(week_ago = max_date - 6) %>%
# filter(date == max(date)) %>%
filter(date >= week_ago, date <= max_date) %>%
group_by(country) %>%
summarise(y = sum(cases_non_cum),
pop = dplyr::first(pop),
date_range = paste0(min(date), ' - ', max(date)),
yp = sum(cases_non_cum) / dplyr::first(pop) * 1000000) %>%
ungroup %>%
filter(pop > 1000000) %>%
arrange(desc(yp)) %>%
head(15) %>%
mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))
ggplot(data = pd,
aes(x = country,
y = yp)) +
geom_bar(stat = 'identity') +
theme_simple() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
geom_text(aes(label = round(yp, digits = 0)),
nudge_y = -50,
color = 'white') +
labs(x = '',
y = 'Cases per 1,000,000 (last 7 days)',
title = 'New confirmed COVID-19 cases per million population, last 7 days')
pd <- df_country %>%
left_join(world_pop %>% dplyr::select(iso, pop)) %>%
group_by(country) %>%
mutate(max_date = max(date)) %>%
mutate(week_ago = max_date - 6) %>%
# filter(date == max(date)) %>%
filter(date >= week_ago, date <= max_date) %>%
group_by(country) %>%
summarise(y = sum(deaths_non_cum),
pop = dplyr::first(pop),
date_range = paste0(min(date), ' - ', max(date)),
yp = sum(deaths_non_cum) / dplyr::first(pop) * 1000000) %>%
ungroup %>%
filter(pop > 1000000) %>%
arrange(desc(yp)) %>%
head(10) %>%
mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))
ggplot(data = pd,
aes(x = country,
y = yp)) +
geom_bar(stat = 'identity') +
theme_simple() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
geom_text(aes(label = round(yp, digits = 0)),
nudge_y = -10,
color = 'white') +
labs(x = '',
y = 'Deaths per 1,000,000 (last 7 days)',
title = 'New confirmed COVID-19 deaths per million population, last 7 days')
pd <- esp_df %>%
left_join(esp_pop %>% dplyr::select(ccaa, pop)) %>%
mutate(country = 'Spain') %>%
bind_rows(
ita %>% left_join(ita_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'Italy')
) %>%
bind_rows(
df %>% filter(country == 'US') %>% mutate(ccaa = district) %>% left_join(regions_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'US')) %>%
group_by(ccaa) %>%
mutate(max_date = max(date)) %>%
mutate(week_ago = max_date - 6) %>%
# filter(date == max(date)) %>%
filter(date >= week_ago, date <= max_date) %>%
group_by(ccaa) %>%
summarise(y = sum(cases_non_cum),
country = dplyr::first(country),
pop = dplyr::first(pop),
date_range = paste0(min(date), ' - ', max(date))) %>%
ungroup %>%
mutate(yp = y / pop * 1000000) %>%
ungroup %>%
# filter(pop > 1000000) %>%
arrange(desc(yp))
#Get country totals
pd %>%
group_by(country) %>%
summarise(y = sum(y, na.rm = T),
pop = sum(pop, na.rm = T)) %>%
ungroup %>%
mutate(yp = y / pop * 1000000)
# A tibble: 3 x 4
country y pop yp
<chr> <dbl> <dbl> <dbl>
1 Italy 15490 60491453 256.
2 Spain 21573 47026208 459.
3 US 200265 328239523 610.
library(knitr)
library(kableExtra)
pd <- pd %>%
mutate(yp = round(yp, digits = 1)) %>%
mutate(Rank = 1:nrow(pd)) %>%
dplyr::select(Rank,
Región = ccaa,
`Casos nuevos, 7 días` = y,
Población = pop,
`Casos nuevos 7 días por millón` = yp)
kable(pd) %>%
kable_styling("striped", full_width = F) %>%
column_spec(which(names(pd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
row_spec(which(pd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
| Rank | Región | Casos nuevos, 7 días | Población | Casos nuevos 7 días por millón |
|---|---|---|---|---|
| 1 | Massachusetts | 16182 | 6892503 | 2347.8 |
| 2 | Rhode Island | 2365 | 1059361 | 2232.5 |
| 3 | New York | 40912 | 19453561 | 2103.1 |
| 4 | New Jersey | 18627 | 8882190 | 2097.1 |
| 5 | Navarra | 1157 | 654214 | 1768.5 |
| 6 | Delaware | 1426 | 973764 | 1464.4 |
| 7 | District of Columbia | 962 | 705749 | 1363.1 |
| 8 | Connecticut | 4600 | 3565287 | 1290.2 |
| 9 | Illinois | 15981 | 12671821 | 1261.1 |
| 10 | CyL | 2963 | 2399548 | 1234.8 |
| 11 | CLM | 2463 | 2032863 | 1211.6 |
| 12 | La Rioja | 361 | 316798 | 1139.5 |
| 13 | Nebraska | 2079 | 1934408 | 1074.7 |
| 14 | Iowa | 3221 | 3155070 | 1020.9 |
| 15 | Maryland | 6005 | 6045680 | 993.3 |
| 16 | País Vasco | 2189 | 2207776 | 991.5 |
| 17 | Pennsylvania | 9592 | 12801989 | 749.3 |
| 18 | Piemonte | 3149 | 4376000 | 719.6 |
| 19 | Indiana | 4796 | 6732219 | 712.4 |
| 20 | Colorado | 4006 | 5758736 | 695.6 |
| 21 | Aragón | 808 | 1319291 | 612.5 |
| 22 | Michigan | 6083 | 9986857 | 609.1 |
| 23 | Liguria | 944 | 1557000 | 606.3 |
| 24 | Kansas | 1692 | 2913314 | 580.8 |
| 25 | Virginia | 4849 | 8535519 | 568.1 |
| 26 | Mississippi | 1662 | 2976149 | 558.4 |
| 27 | South Dakota | 493 | 884659 | 557.3 |
| 28 | Lombardia | 5567 | 10040000 | 554.5 |
| 29 | Cantabria | 303 | 581078 | 521.4 |
| 30 | New Mexico | 1032 | 2096829 | 492.2 |
| 31 | Cataluña | 3763 | 7675217 | 490.3 |
| 32 | Louisiana | 2262 | 4648794 | 486.6 |
| 33 | North Dakota | 358 | 762062 | 469.8 |
| 34 | Madrid | 3032 | 6663394 | 455.0 |
| 35 | Trentino-Alto Adige | 472 | 1070000 | 441.1 |
| 36 | Georgia | 4381 | 10617423 | 412.6 |
| 37 | Minnesota | 2194 | 5639632 | 389.0 |
| 38 | Emilia-Romagna | 1713 | 4453000 | 384.7 |
| 39 | Tennessee | 2469 | 6829174 | 361.5 |
| 40 | New Hampshire | 476 | 1359711 | 350.1 |
| 41 | Asturias | 344 | 1022800 | 336.3 |
| 42 | Utah | 1060 | 3205958 | 330.6 |
| 43 | Wisconsin | 1802 | 5822434 | 309.5 |
| 44 | Ceuta | 26 | 84777 | 306.7 |
| 45 | Galicia | 796 | 2699499 | 294.9 |
| 46 | Ohio | 3333 | 11689100 | 285.1 |
| 47 | North Carolina | 2934 | 10488084 | 279.7 |
| 48 | Kentucky | 1229 | 4467673 | 275.1 |
| 49 | Nevada | 845 | 3080156 | 274.3 |
| 50 | California | 10569 | 39512223 | 267.5 |
| 51 | C. Valenciana | 1303 | 5003769 | 260.4 |
| 52 | Arizona | 1883 | 7278717 | 258.7 |
| 53 | Alabama | 1256 | 4903185 | 256.2 |
| 54 | Valle d’Aosta | 32 | 126202 | 253.6 |
| 55 | South Carolina | 1178 | 5148714 | 228.8 |
| 56 | Arkansas | 682 | 3017804 | 226.0 |
| 57 | Veneto | 1079 | 4905000 | 220.0 |
| 58 | Missouri | 1312 | 6137428 | 213.8 |
| 59 | Extremadura | 227 | 1067710 | 212.6 |
| 60 | Texas | 6077 | 28995881 | 209.6 |
| 61 | Washington | 1574 | 7614893 | 206.7 |
| 62 | Marche | 295 | 1532000 | 192.6 |
| 63 | Florida | 4042 | 21477737 | 188.2 |
| 64 | Wyoming | 106 | 578759 | 183.2 |
| 65 | Andalucía | 1451 | 8414240 | 172.4 |
| 66 | Toscana | 572 | 3737000 | 153.1 |
| 67 | Oklahoma | 601 | 3956971 | 151.9 |
| 68 | Murcia | 219 | 1493898 | 146.6 |
| 69 | Friuli Venezia Giulia | 167 | 1216000 | 137.3 |
| 70 | Maine | 158 | 1344212 | 117.5 |
| 71 | Melilla | 10 | 86487 | 115.6 |
| 72 | Abruzzo | 145 | 1315000 | 110.3 |
| 73 | Lazio | 562 | 5897000 | 95.3 |
| 74 | Oregon | 383 | 4217737 | 90.8 |
| 75 | Idaho | 148 | 1787065 | 82.8 |
| 76 | West Virginia | 144 | 1792147 | 80.4 |
| 77 | Baleares | 92 | 1149460 | 80.0 |
| 78 | Vermont | 41 | 623989 | 65.7 |
| 79 | Puglia | 233 | 4048000 | 57.6 |
| 80 | Sicilia | 240 | 5027000 | 47.7 |
| 81 | Molise | 14 | 308493 | 45.4 |
| 82 | Umbria | 30 | 884640 | 33.9 |
| 83 | Campania | 185 | 5827000 | 31.7 |
| 84 | Canarias | 66 | 2153389 | 30.6 |
| 85 | Sardegna | 41 | 1648000 | 24.9 |
| 86 | Alaska | 18 | 731545 | 24.6 |
| 87 | Calabria | 39 | 1957000 | 19.9 |
| 88 | Basilicata | 11 | 567118 | 19.4 |
| 89 | Hawaii | 22 | 1415872 | 15.5 |
| 90 | Montana | 11 | 1068778 | 10.3 |
| 91 | American Samoa | 0 | NA | NA |
| 92 | Diamond Princess | 0 | NA | NA |
| 93 | Grand Princess | 0 | NA | NA |
| 94 | Guam | 6 | NA | NA |
| 95 | Northern Mariana Islands | 0 | NA | NA |
| 96 | Puerto Rico | 123 | NA | NA |
| 97 | Recovered | 0 | NA | NA |
| 98 | United States Virgin Islands | 6 | NA | NA |
| 99 | US | 1 | NA | NA |
| 100 | Virgin Islands | 12 | NA | NA |
| 101 | Wuhan Evacuee | 4 | NA | NA |
xpd = pd %>% filter(`Región` %in% c(ita$ccaa, esp_df$ccaa))
xpd$Rank <- 1:nrow(xpd)
kable(xpd) %>%
kable_styling("striped", full_width = F) %>%
column_spec(which(names(xpd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
row_spec(which(xpd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
| Rank | Región | Casos nuevos, 7 días | Población | Casos nuevos 7 días por millón |
|---|---|---|---|---|
| 1 | Navarra | 1157 | 654214 | 1768.5 |
| 2 | CyL | 2963 | 2399548 | 1234.8 |
| 3 | CLM | 2463 | 2032863 | 1211.6 |
| 4 | La Rioja | 361 | 316798 | 1139.5 |
| 5 | País Vasco | 2189 | 2207776 | 991.5 |
| 6 | Piemonte | 3149 | 4376000 | 719.6 |
| 7 | Aragón | 808 | 1319291 | 612.5 |
| 8 | Liguria | 944 | 1557000 | 606.3 |
| 9 | Lombardia | 5567 | 10040000 | 554.5 |
| 10 | Cantabria | 303 | 581078 | 521.4 |
| 11 | Cataluña | 3763 | 7675217 | 490.3 |
| 12 | Madrid | 3032 | 6663394 | 455.0 |
| 13 | Trentino-Alto Adige | 472 | 1070000 | 441.1 |
| 14 | Emilia-Romagna | 1713 | 4453000 | 384.7 |
| 15 | Asturias | 344 | 1022800 | 336.3 |
| 16 | Ceuta | 26 | 84777 | 306.7 |
| 17 | Galicia | 796 | 2699499 | 294.9 |
| 18 | C. Valenciana | 1303 | 5003769 | 260.4 |
| 19 | Valle d’Aosta | 32 | 126202 | 253.6 |
| 20 | Veneto | 1079 | 4905000 | 220.0 |
| 21 | Extremadura | 227 | 1067710 | 212.6 |
| 22 | Marche | 295 | 1532000 | 192.6 |
| 23 | Andalucía | 1451 | 8414240 | 172.4 |
| 24 | Toscana | 572 | 3737000 | 153.1 |
| 25 | Murcia | 219 | 1493898 | 146.6 |
| 26 | Friuli Venezia Giulia | 167 | 1216000 | 137.3 |
| 27 | Melilla | 10 | 86487 | 115.6 |
| 28 | Abruzzo | 145 | 1315000 | 110.3 |
| 29 | Lazio | 562 | 5897000 | 95.3 |
| 30 | Baleares | 92 | 1149460 | 80.0 |
| 31 | Puglia | 233 | 4048000 | 57.6 |
| 32 | Sicilia | 240 | 5027000 | 47.7 |
| 33 | Molise | 14 | 308493 | 45.4 |
| 34 | Umbria | 30 | 884640 | 33.9 |
| 35 | Campania | 185 | 5827000 | 31.7 |
| 36 | Canarias | 66 | 2153389 | 30.6 |
| 37 | Sardegna | 41 | 1648000 | 24.9 |
| 38 | Calabria | 39 | 1957000 | 19.9 |
| 39 | Basilicata | 11 | 567118 | 19.4 |
x = esp_df %>% left_join(esp_pop) %>%
bind_rows(ita %>% left_join(ita_pop)) %>%
filter(date == '2020-04-09') %>%
filter(ccaa %in% c('Madrid',
'Cataluña', 'Lombardia')) %>%
dplyr::select(ccaa, deaths, cases, pop) %>%
mutate(deathsp = deaths / pop * 100000,
casesp = cases / pop * 100000)
covid19::plot_day_zero(countries = c('Italy', 'Spain', 'China', 'South Korea', 'Sinagpore'),
districts = c('Madrid', #'Hubei',
'Cataluña', 'Lombardia'),
by_district = T,
roll = 7,
deaths = F,
pop = T,
day0 = 0,
ylog = F,
calendar = T,
cumulative = F) +
labs(x = 'Data',
y = 'Casos diaris (mitjana mòbil de 7 dies)',
title = 'Casos diaris per 1.000.000',
subtitle = 'Mitjana mòbil de 7 dies')
covid19::plot_day_zero(countries = c('Italy', 'Spain'),
roll = 7,
deaths = F,
pop = T,
day0 = 0,
ylog = F,
calendar = T,
cumulative = F) +
labs(x = 'Data',
y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
title = 'Casos diaris per 1.000.000',
subtitle = 'Mitjana mòbil de 7 dies') +
theme(legend.direction = 'horizontal',
legend.position = 'top')
covid19::plot_day_zero(countries = c('Italy', 'Spain'),
roll = 7,
deaths = T,
pop = T,
day0 = 0,
ylog = F,
calendar = T,
cumulative = F) +
labs(x = 'Data',
y = 'Morts diaris per 1.000.000 (mitjana mòbil de 7 dies)',
title = 'Morts diaris per 1.000.000',
subtitle = 'Mitjana mòbil de 7 dies') +
theme(legend.direction = 'horizontal',
legend.position = 'top')
covid19::plot_day_zero(countries = c('Italy', 'Spain'),
by_district = T,
districts = c('Madrid', 'Emilia-Romagna',
'Cataluña', 'Lombardia'),
roll = 7,
deaths = F,
pop = T,
day0 = 0,
ylog = F,
calendar = T,
cumulative = F) +
labs(x = 'Data',
y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
title = 'Casos diaris per 1.000.000',
subtitle = 'Mitjana mòbil de 7 dies') +
theme(legend.direction = 'horizontal',
legend.position = 'top')
covid19::plot_day_zero(countries = c('Japan', 'South Korea', 'Singapore', 'Hong Kong'),
roll = 7,
deaths = F,
# pop = T,
day0 = 0,
ylog = F,
calendar = T,
cumulative = F) +
labs(x = 'Data',
y = 'Casos diaris (mitjana mòbil de 3 dies)',
title = 'Casos diaris',
subtitle = 'Mitjana mòbil de 3 dies') +
facet_wrap(~geo, scales = 'free_y') +
theme(legend.position = 'none')
pd <- esp_df %>%
arrange(date) %>%
group_by(date) %>%
summarise(deaths_non_cum = sum(deaths_non_cum),
cases_non_cum = sum(cases_non_cum)) %>%
ungroup %>%
mutate(dow = weekdays(date)) %>%
mutate(week = isoweek(date)) %>%
group_by(week) %>%
mutate(start_date = min(date)) %>%
ungroup %>%
filter(date >= '2020-03-09')
pd$dow <- factor(pd$dow,
levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
'Saturday', 'Sunday'),
labels = c('Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes',
'Sábado', 'Domingo'))
n_cols <- length(unique(pd$start_date))
cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))(n_cols)
pd$start_date <- factor(pd$start_date)
ggplot(data = pd,
aes(x = dow,
y = cases_non_cum,
group = week,
color = start_date)) +
geom_line(size = 4) +
geom_point(size = 4) +
scale_color_manual(name = 'Primer día\nde la semana',
values = cols) +
theme_simple() +
labs(x = 'Día de la semana',
y = 'Muertes')
pd <- esp_df %>%
mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
group_by(geo, date) %>%
summarise(deaths = sum(deaths)) %>%
ungroup
pp <- esp_pop %>%
mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
group_by(geo) %>%
summarise(pop = sum(pop))
pd <- left_join(pd, pp) %>% mutate(pk = deaths / pop * 100000)
ggplot(data = pd %>% filter(pk > 0.1),
aes(x = date,
y = pk,
color = geo)) +
geom_line() +
labs(x = 'Date',
y = 'Cumulative deaths per 100,000') +
scale_color_manual(name = '',
values = c('red', 'black')) +
theme_simple()
countries <- c(
'Spain',
'US',
'France',
# 'Portugal',
'Italy',
'China'
)
districts <- c('Lombardia', 'Cataluña',
'New York',
# 'Hubei',
'CyL',
'CLM',
# 'Washington',
'La Rioja',
'Madrid')
plot_day_zero(countries = countries,
districts = districts,
ylog = F,
day0 = 1,
cumulative = F,
time_before = 0,
roll = 3,
deaths = T,
pop = T,
by_district = T,
point_alpha = 0,
line_size = 3,
color_var = 'geo')
dir.create('/tmp/totesmou')
plot_day_zero(countries = c('Spain', 'Italy', max_date = Sys.Date()-1),
point_size = 2, calendar = T)
ggsave('/tmp/totesmou/1_italy_vs_spain.png',
height = 5.6,
width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
point_size = 2, calendar = F)
ggsave('/tmp/totesmou/2_italy_vs_spain_temps_ajustat.png',
height = 5.6,
width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
point_size = 2, calendar = T, deaths = T, day0 = 10)
plot_day_zero(countries = c('Spain', 'Italy'),
point_size = 2, calendar = F, deaths = T, day0 = 10)
plot_day_zero(countries = c('Spain', 'Italy'),
point_size = 2, calendar = F, deaths = T, day0 = 10, pop = T)
plot_day_zero(countries = c('Spain', 'Italy'),
point_size = 2, calendar = F, deaths = T, day0 = 1, pop = T, roll = 3, roll_fun = 'mean')
plot_day_zero(countries = c('Spain', 'Italy'),
districts = c('Cataluña', 'Madrid',
'Lombardia', 'Emilia-Romagna'),
by_district = T,
day0 = 10,
pop = T)
plot_day_zero(countries = c('Spain', 'Italy'),
districts = c('Cataluña', 'Madrid',
'Lombardia', 'Emilia-Romagna'),
by_district = T,
deaths = T,
day0 = 1,
pop = T)
plot_day_zero(countries = c('Spain', 'Italy'),
districts = c('Cataluña', 'Madrid',
'Lombardia', 'Emilia-Romagna'),
by_district = T,
deaths = T,
roll = 3,
roll_fun = 'mean',
day0 = 1,
ylog = F,
pop = T, calendar = T)
plot_day_zero(countries = c('Spain', 'Italy', 'US'),
districts = c('Cataluña', 'Madrid',
'New York',
'Lombardia', 'Emilia-Romagna'),
by_district = T,
deaths = T,
roll = 3,
roll_fun = 'mean',
day0 = 1,
pop = T)
plot_day_zero(countries = c('Spain', 'Italy', 'US'),
districts = c('Cataluña', 'Madrid',
'New York',
'Lombardia', 'Emilia-Romagna'),
by_district = T,
deaths = T,
# roll = 7,
roll_fun = 'mean',
day0 = 1,
pop = F)
plot_day_zero(color_var = 'iso', by_district = T,
deaths = T,
day0 = 1,
alpha = 0.6,
point_alpha = 0,
calendar = T,
countries = c('Spain', 'Italy'),
pop = T)
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
# ifelse(x == 'Cataluña', 'Cataluña',
'Altres CCAA')
# )
}
place_transform_ita <- function(x){
ifelse(x == 'Lombardia', 'Lombardia',
# ifelse(x == 'Emilia Romagna', 'Emilia Romagna',
'Altres regions italianes')
# )
}
pd <- esp_df %>% mutate(country = 'Espanya') %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
country = 'Itàlia')) %>%
group_by(country, ccaa, date) %>%
summarise(cases = sum(cases),
uci = sum(uci),
deaths = sum(deaths),
cases_non_cum = sum(cases_non_cum),
deaths_non_cum = sum(deaths_non_cum),
uci_non_cum = sum(uci_non_cum)) %>%
left_join(esp_pop %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
group_by(ccaa) %>%
summarise(pop = sum(pop))) %>%
mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
group_by(country, date) %>%
mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
levels = c('Madrid',
# 'Cataluña',
'Altres CCAA',
'Lombardia',
# 'Emilia Romagna',
'Altres regions italianes'))
cols <- c(
rev(brewer.pal(n = 3, 'Reds'))[1:2],
rev(brewer.pal(n = 3, 'Blues'))[1:2]
)
label_data <- pd %>%
filter(country %in% c('Itàlia', 'Espanya')) %>%
group_by(country) %>%
filter(date == max(date)) %>%
mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>%
mutate(x = date - 2,
y = p_deaths_country +
ifelse(p_deaths_country > 50, 10, -9))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
aes(x = date,
y = p_deaths_country,
color = ccaa,
group = ccaa)) +
# geom_bar(stat = 'identity',
# position = position_dodge(width = 0.99)) +
scale_fill_manual(name = '', values = cols) +
scale_color_manual(name = '', values = cols) +
geom_line(size = 2,
aes(color = ccaa)) +
# geom_point(size = 3,
# aes(color = ccaa)) +
facet_wrap(~country) +
# xlim(as.Date('2020-03-09'),
# Sys.Date()-1) +
theme_simple() +
geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
# geom_line(lty = 2, alpha = 0.6) +
labs(x = 'Data',
y = 'Percentatge de morts',
title = 'Percentatge de morts: regió més afectada vs resta del pais',
subtitle = 'A partir del primer día a cada país amb 50 morts acumulades') +
theme(legend.position = 'top',
strip.text = element_text(size = 30),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(nrow = 2,
keywidth = 5)) +
geom_text(data = label_data,
aes(x = x-2,
y = y,
label = label,
color = ccaa),
size = 7,
show.legend = FALSE)
# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
# ifelse(x == 'Cataluña', 'Cataluña',
'Rest of Spain')
# )
}
place_transform_ita <- function(x){
ifelse(x == 'Lombardia', 'Lombardia',
# ifelse(x == 'Emilia Romagna', 'Emilia Romagna',
'Rest of Italy')
# )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
country = 'Italy')) %>%
group_by(country, ccaa, date) %>%
summarise(cases = sum(cases),
uci = sum(uci),
deaths = sum(deaths),
cases_non_cum = sum(cases_non_cum),
deaths_non_cum = sum(deaths_non_cum),
uci_non_cum = sum(uci_non_cum)) %>%
left_join(esp_pop %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
group_by(ccaa) %>%
summarise(pop = sum(pop))) %>%
mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
group_by(country, date) %>%
mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
levels = c('Madrid',
# 'Cataluña',
'Rest of Spain',
'Lombardia',
# 'Emilia Romagna',
'Rest of Italy'))
cols <- c(
rev(brewer.pal(n = 3, 'Reds'))[1:2],
rev(brewer.pal(n = 3, 'Blues'))[1:2]
)
label_data <- pd %>%
filter(country %in% c('Italy', 'Spain')) %>%
group_by(country) %>%
filter(date == max(date)) %>%
# mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>%
mutate(x = date - 2,
y = p_deaths_country +
ifelse(p_deaths_country > 50, 10, -9)) %>%
ungroup
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=30])) %>% filter(date >= start_day),
aes(x = date,
y = p_deaths_country,
color = ccaa,
group = ccaa)) +
# geom_bar(stat = 'identity',
# position = position_dodge(width = 0.99)) +
scale_fill_manual(name = '', values = cols) +
scale_color_manual(name = '', values = cols) +
geom_line(size = 2,
aes(color = ccaa)) +
# geom_point(size = 3,
# aes(color = ccaa)) +
# facet_wrap(~country, scales = 'free_x') +
# xlim(as.Date('2020-03-09'),
# Sys.Date()-1) +
theme_simple() +
geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
# geom_line(lty = 2, alpha = 0.6) +
labs(x = 'Date',
y = 'Percentage of deaths',
title = 'Percentage of country\'s cumulative COVID-19 deaths by geography',
subtitle = 'Starting at first day with 50 or more cumulative deaths') +
theme(legend.position = 'top',
strip.text = element_text(size = 30),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(nrow = 2,
keywidth = 5))# +
# geom_text(data = label_data,
# aes(x = x-2,
# y = y,
# label = label,
# color = ccaa),
# size = 7,
# show.legend = FALSE)
Same chart as above but absolute numbers
# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
# ifelse(x == 'Cataluña', 'Cataluña',
'Rest of Spain')
# )
}
place_transform_ita <- function(x){
ifelse(x == 'Lombardia', 'Lombardia',
# ifelse(x == 'Emilia Romagna', 'Emilia Romagna',
'Rest of Italy')
# )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
country = 'Italy')) %>%
group_by(country, ccaa, date) %>%
summarise(cases = sum(cases),
uci = sum(uci),
deaths = sum(deaths),
cases_non_cum = sum(cases_non_cum),
deaths_non_cum = sum(deaths_non_cum),
uci_non_cum = sum(uci_non_cum)) %>%
left_join(esp_pop %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
group_by(ccaa) %>%
summarise(pop = sum(pop))) %>%
mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
group_by(country, date) %>%
mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
levels = rev(c('Madrid',
# 'Cataluña',
'Rest of Spain',
'Lombardia',
# 'Emilia Romagna',
'Rest of Italy')))
cols <- c(
rev(brewer.pal(n = 3, 'Reds'))[1:2],
rev(brewer.pal(n = 3, 'Blues'))[1:2]
)
label_data <- pd %>%
filter(country %in% c('Italy', 'Spain')) %>%
group_by(country) %>%
filter(date == max(date)) %>%
# mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>%
mutate(x = date - 2,
y = p_deaths_country +
ifelse(p_deaths_country > 50, 10, -9)) %>%
ungroup
# Get moving
ma <- function(x, n = 2){
if(length(x) >= n){
stats::filter(x, rep(1 / n, n), sides = 1)
} else {
new_n <- length(x)
stats::filter(x, rep(1 / new_n, new_n), sides = 1)
}
}
ggplot(data = pd %>% group_by(country) %>%
mutate(start_day = dplyr::first(date[deaths >=1])) %>%
filter(date >= start_day) %>%
mutate(days_since = as.numeric(date - start_day)) %>%
ungroup %>% arrange(date) %>%
group_by(country, ccaa) %>%
mutate(rolling_average = ma(deaths_non_cum, n = 3)) %>%
ungroup,
aes(x = date,
y = rolling_average,
color = ccaa,
group = ccaa)) +
# geom_bar(stat = 'identity',
# position = position_dodge(width = 0.99)) +
scale_fill_manual(name = '', values = cols) +
scale_color_manual(name = '', values = cols) +
geom_line(size = 2,
aes(color = ccaa)) +
# geom_point(size = 3,
# aes(color = ccaa)) +
# scale_y_log10(limits = c(1.5, 1000)) +
# scale_y_log10() +
facet_wrap(~country) +
# xlim(as.Date('2020-03-09'),
# Sys.Date()-1) +
theme_simple() +
# geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
# geom_line(lty = 2, alpha = 0.6) +
labs(x = 'Date',
y = 'Deaths (log-scale)',
title = 'Daily COVID-19 deaths by geography',
subtitle = '3-day rolling average') +
theme(legend.position = 'top',
plot.title = element_text(size = 30),
plot.subtitle = element_text(size = 24),
strip.text = element_text(size = 30, hjust = 0.5),
legend.text = element_text(size = 20)) +
guides(color = guide_legend(nrow = 2,
keywidth = 5))
ggsave('~/Desktop/madlom.png')
roll_curve <- function(data,
n = 4,
deaths = FALSE,
scales = 'fixed',
nrow = NULL,
ncol = NULL,
pop = FALSE){
# Create the n day rolling avg
ma <- function(x, n = 5){
if(length(x) >= n){
stats::filter(x, rep(1 / n, n), sides = 1)
} else {
new_n <- length(x)
stats::filter(x, rep(1 / new_n, new_n), sides = 1)
}
}
pd <- data
if(deaths){
pd$var <- pd$deaths_non_cum
} else {
pd$var <- pd$cases_non_cum
}
if('ccaa' %in% names(pd)){
pd$geo <- pd$ccaa
} else {
pd$geo <- pd$iso
}
if(pop){
# try to get population
if(any(pd$geo %in% unique(esp_df$ccaa))){
right <- esp_pop
right_var <- 'ccaa'
} else {
right <- world_pop
right_var <- 'iso'
}
pd <- pd %>% left_join(right %>% dplyr::select(all_of(right_var), pop),
by = c('geo' = right_var)) %>%
mutate(var = var / pop * 100000)
}
pd <- pd %>%
arrange(date) %>%
group_by(geo) %>%
mutate(with_lag = ma(var, n = n))
ggplot() +
geom_bar(data = pd,
aes(x = date,
y = var),
stat = 'identity',
fill = 'grey',
alpha = 0.8) +
geom_area(data = pd,
aes(x = date,
y = with_lag),
color = 'red',
fill = 'red',
alpha = 0.6) +
facet_wrap(~geo, scales = scales, nrow = nrow, ncol = ncol) +
theme_simple() +
labs(x = '',
y = ifelse(deaths, 'Deaths', 'Cases'),
title = paste0('Daily (non-cumulative) ', ifelse(deaths, 'deaths', 'cases'),
ifelse(pop, ' (per 100,000)', '')),
subtitle = paste0('Data as of ', max(data$date),
'.\nRed line = ', n, ' day rolling average. Grey bar = day-specific value.')) +
theme(axis.text.x = element_text(size = 7,
angle = 90,
hjust = 0.5, vjust = 1)) #+
# scale_x_date(name ='',
# breaks = sort(unique(pd$date)),
# labels = format(sort(unique(pd$date)), '%b %d'))
# scale_y_log10()
}
this_ccaa <- 'Cataluña'
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, scales = 'fixed') + theme(strip.text = element_text(size = 30))
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))
african_countries <- world_pop$country[world_pop$sub_region %in% c('Sub-Saharan Africa')]
pd <- plot_day_zero(countries = c(african_countries),
day0 = 1,
max_date = Sys.Date() - 46,
ylog = F) +
ylim(0, 5000)
pd
pd <- plot_day_zero(countries = c(african_countries),
day0 = 1,
max_date = Sys.Date(),
ylog = F) +
ylim(0, 5000)
pd
pd <- plot_day_zero(countries = c(african_countries),
day0 = 1,
calendar = T,
max_date = Sys.Date(),
ylog = T) + theme(legend.position = 'none')
pd
pd <- plot_day_zero(countries = c(african_countries),
day0 = 10,
max_date = Sys.Date() - 13)
pd
pd <- plot_day_zero(countries = c(african_countries),
day0 = 10,
max_date = Sys.Date() - 6)
pd
pd <- plot_day_zero(countries = c(african_countries),
day0 = 10,
max_date = Sys.Date(),
ylog = F)
pd
latam_countries <- world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean')]
latam_countries <- latam_countries[!latam_countries %in% c('Guyana')]
pd <- plot_day_zero(countries = c(latam_countries),
day0 = 10,
max_date = Sys.Date() - 13)
pd
pd <- plot_day_zero(countries = c(latam_countries),
day0 = 10,
max_date = Sys.Date() - 6)
pd
pd <- plot_day_zero(countries = c(latam_countries),
day0 = 10,
max_date = Sys.Date())
pd
Latin America and Africa vs Europe
isos <- sort(unique(world_pop$sub_region))
keep_countries <- world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean', 'Sub-Saharan Africa') |
world_pop$region %in% 'Europe']
keep_countries <- keep_countries[!keep_countries %in% c('Guyana')]
pd <- df_country %>% ungroup %>%
filter(country %in% keep_countries) %>%
dplyr::select(-country) %>%
left_join(world_pop) %>%
group_by(iso) %>%
mutate(day0 = min(date[cases >= 10])) %>%
ungroup %>%
mutate(days_since = date - day0) %>%
filter(days_since >= 0)
cols <- c( 'black')
g <- ggplot(data = pd,
aes(x = days_since,
y = cases,
group = country,
color = region)) +
geom_line(data = pd %>% filter(region == 'Europe'),
alpha = 0.6) +
scale_y_log10() +
scale_color_manual(name = '', values = cols) +
theme_simple() +
labs(x = 'Days since first day at 10 cases') +
theme(legend.position = 'top')
g
cols <- c('darkred', 'black')
g +
geom_line(data = pd %>% filter(region == 'Africa'),
# alpha = 1,
size = 1.5,
alpha = 0.8) +
scale_y_log10() +
scale_color_manual(name = '', values = cols)
cols <- c( 'darkorange', 'black')
g +
geom_line(data = pd %>% filter(sub_region == 'Latin America and the Caribbean'),
# alpha = 1,
size = 1.5,
alpha = 0.8) +
scale_color_manual(name = '', values = cols)
cols <- c('darkred', 'darkorange', 'black')
g +
geom_line(data = pd %>% filter(sub_region != 'Europe'),
# alpha = 1,
size = 1.5,
alpha = 0.8) +
scale_color_manual(name = '', values = cols)
# Assets
pyramid_dir <- '../../data-raw/pyramids/'
pyramid_files <- dir(pyramid_dir)
out_list <- list()
for(i in 1:length(pyramid_files)){
out_list[[i]] <- read_csv(paste0(pyramid_dir, pyramid_files[i])) %>%
mutate(region = gsub('.csv', '', pyramid_files[i]))
}
pyramid <- bind_rows(out_list)
make_pyramid <- function(the_region = 'AFRICA-2019'){
sub_data <- pyramid %>% filter(region == the_region)
sub_data$Age <- factor(sub_data$Age, levels = sub_data$Age)
sub_data <- tidyr::gather(sub_data, key, value, M:F)
ggplot(data = sub_data,
aes(x = Age,
y = value,
fill = key)) +
geom_bar(stat = 'identity',
position = position_dodge(),
alpha = 0.6,
color = 'black') +
scale_fill_manual(name = '', values = c('darkorange', 'lightblue')) +
theme_simple() +
labs(x = 'Age group',
y = 'Population') +
theme(legend.position = 'top')
}
make_pyramid_overlay <- function(){
sub_data <- pyramid %>% filter(region %in% c('EUROPE-2019',
'AFRICA-2019',
'LATIN AMERICA AND THE CARIBBEAN-2019')) %>%
mutate(region = gsub('-2019', '', region))
sub_data$Age <- factor(sub_data$Age, levels = unique(sub_data$Age))
sub_data <- tidyr::gather(sub_data, key, value, M:F) %>%
group_by(Age, region) %>%
summarise(value = sum(value)) %>%
ungroup %>%
group_by(region) %>%
mutate(p = value / sum(value) * 100) %>%
ungroup
ggplot(data = sub_data,
aes(x = Age,
y = p,
color = region,
group = region,
fill = region)) +
geom_area(position = position_dodge(),
alpha = 0.6) +
scale_fill_manual(name = '',
values = c('darkred', 'darkorange', 'black')) +
scale_color_manual(name = '',
values = c('darkred', 'darkorange', 'black')) +
theme_simple() +
theme(legend.position = 'top') +
labs(x = 'Age group', y = 'Percentage')
}
make_pyramid(the_region = 'Spain-2019') + labs(title = 'Spain')
make_pyramid(the_region = 'Italy-2019') + labs(title = 'Italy')
make_pyramid(the_region = 'EUROPE-2019') + labs(title = 'Europe')
make_pyramid(the_region = 'Kenya-2019') + labs(title = 'Kenya')
make_pyramid(the_region = 'Mozambique-2019') + labs(title = 'Mozambique')
make_pyramid(the_region = 'Tanzania-2019') + labs(title = 'Tanzania')
make_pyramid(the_region = 'Guatemala-2019') + labs(title = 'Guatemala')
make_pyramid(the_region = 'AFRICA-2019') + labs(title = 'Africa')
make_pyramid(the_region = 'LATIN AMERICA AND THE CARIBBEAN-2019') + labs(title = 'Latin America and the Caribbean')
make_pyramid_overlay() + labs(title = 'Population distribution by region')
pyramid <- pyramid %>%
mutate(old = Age %in% c('80-84', '85-89', '90-94','95-99', '100+'))
pyramid %>%
group_by(region, old) %>%
summarise(n = sum(M) + sum(F)) %>%
ungroup %>%
group_by(region) %>%
mutate(p = n / sum(n) * 100) %>%
filter(old)
plot_day_zero(countries = c('South Africa', 'Spain'), day0 = 1, calendar = T)
plot_day_zero(countries = c('Kenya', 'Italy'), day0 = 10, calendar = F)
plot_day_zero(countries = c('China', 'Italy', 'Spain'),
districts = c('Hubei', 'Lombardia', 'Cataluña', 'Madrid'),
by_district = T,
point_alpha = 0,
day0 = 5,
pop = F,
deaths = T,
ylog = T,
calendar = F,
roll = 5)
# cat_transform <- function(x){ifelse(x == 'Catalunya', 'Cataluña', x)}
cat_transform <- function(x){return(x)}
pd <- por_df %>% mutate(country = 'Portugal') %>%
bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
bind_rows(fra_df %>% mutate(country = 'France')) %>%
bind_rows(ita %>% mutate(country = 'Italy')) %>%
bind_rows(
df %>% filter(country == 'Andorra') %>%
mutate(ccaa = 'Andorra')
)
keep_date = pd %>% group_by(country) %>% summarise(max_date= max(date)) %>% summarise(x = min(max_date)) %>% .$x
pd <- pd %>%
mutate(ccaa = cat_transform(ccaa)) %>%
group_by(ccaa) %>%
filter(date == keep_date) %>%
# filter(date == '2020-03-27') %>%
ungroup %>%
dplyr::select(date, ccaa, deaths, deaths_non_cum,
cases, cases_non_cum) %>%
left_join(regions_pop %>%
bind_rows(
world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
dplyr::select(-region, -sub_region)
)) %>%
mutate(cases_per_million = cases / pop * 1000000,
deaths_per_million = deaths / pop * 1000000) %>%
mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)
map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)
map <-
rbind(map_esp1,
map_por1,
map_fra1,
map_ita1,
map_and1)
# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
centroids <- left_join(centroids, pd)
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
# centroids$x < 8 & (centroids$y < 43 | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
# map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <- map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <- map[centroids$x > -10 & centroids$y <77,]
# map_sp <- map <- map[centroids$x > -10,]
# fortify
map <- fortify(map, region = 'ccaa')
# join data
map$ccaa <- map$id
map <- left_join(map, pd)
var <- 'deaths_per_million'
map$var <- as.numeric(unlist(map[,var]))
centroids <- centroids[,c('ccaa', 'x', 'y', var)]
centroids <- centroids %>%
filter(ccaa %in% map_sp@data$ccaa)
# cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
# cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
# cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
# cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'lightblue'))(10))
g <- ggplot(data = map,
aes(x = long,
y = lat,
group = group)) +
geom_polygon(aes(fill = var),
lwd = 0.3,
# color = 'darkgrey',
color = NA,
size = 0.6) +
scale_fill_gradientn(name = '',
colours = cols) +
# scale_fill_() +
ggthemes::theme_map() +
theme(legend.position = 'bottom',
plot.title = element_text(size = 16)) +
guides(fill = guide_colorbar(direction= 'horizontal',
barwidth = 50,
barheight = 1,
label.position = 'bottom')) +
labs(title = 'Cumulative COVID-19 deaths per million population, western Mediterranean',
subtitle = paste0('Data as of ', format(max(pd$date), '%B %d, %Y'))) +
geom_text(data = centroids,
aes(x = x,
y = y,
group = NA,
label = paste0(ccaa, '\n',
round(deaths_per_million, digits = 2))),
alpha = 0.8,
size = 3)
g
ggsave('/tmp/map_with_borders.png',
height = 8, width = 13)
dir.create('/tmp/animation_map/')
pd <- por_df %>% mutate(country = 'Portugal') %>%
bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
bind_rows(fra_df %>% mutate(country = 'France')) %>%
bind_rows(ita %>% mutate(country = 'Italy')) %>%
bind_rows(
df %>% filter(country == 'Andorra') %>%
mutate(ccaa = 'Andorra')
)
pd %>% group_by(country) %>% summarise(max_date= max(date))
# A tibble: 5 x 2
country max_date
<chr> <date>
1 Andorra 2020-04-30
2 France 2020-04-28
3 Italy 2020-04-30
4 Portugal 2020-04-30
5 Spain NA
unique_dates <- sort(unique(pd$date))
unique_dates <- unique_dates[unique_dates >= '2020-03-01']
popper <- regions_pop %>%
bind_rows(
world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
dplyr::select(-region, -sub_region)
)
map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)
map <-
rbind(map_esp1,
map_por1,
map_fra1,
map_ita1,
map_and1)
# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
# # centroids$x < 8 &
# (centroids$y < 43 | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
# map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <- map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <- map[centroids$x > -10 & centroids$y <477,]
# fortify
map <- fortify(map, region = 'ccaa')
for(i in 1:length(unique_dates)){
this_date <- unique_dates[i]
today_map <- map
today_centroids <- centroids
today_pd <- pd
today_pd <- today_pd %>%
mutate(ccaa = cat_transform(ccaa)) %>%
group_by(ccaa) %>%
# filter(date == max(date)) %>%
filter(date == this_date) %>%
ungroup %>%
dplyr::select(date, ccaa, deaths, deaths_non_cum,
cases, cases_non_cum) %>%
left_join(popper) %>%
mutate(cases_per_million = cases / pop * 1000000,
deaths_per_million = deaths / pop * 1000000) %>%
mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)
today_centroids <- left_join(today_centroids, today_pd)
# join data
today_map$ccaa <- today_map$id
today_map <- left_join(today_map, today_pd)
var <- 'deaths_per_million'
today_map$var <- as.numeric(unlist(today_map[,var]))
today_map$var <- ifelse(is.na(today_map$var), 0, today_map$var)
today_centroids <- today_centroids[,c('ccaa', 'x', 'y', var)]
today_centroids <- today_centroids %>%
filter(ccaa %in% today_map$ccaa)
today_centroids$var <- today_centroids[,var]
today_centroids$var <- ifelse(is.na(today_centroids$var), 0, today_centroids$var)
# cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
# cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
# cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
# cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'white'))(17))
g <- ggplot(data = today_map,
aes(x = long,
y = lat,
group = group)) +
geom_polygon(aes(fill = var),
lwd = 0.3,
# color = 'darkgrey',
color = NA,
size = 0.6) +
scale_fill_gradientn(name = '',
colours = cols,
breaks = seq(0, 1100, 50),
limits = c(0, 1100)) +
# scale_fill_() +
ggthemes::theme_map() +
theme(legend.position = 'right',
plot.title = element_text(size = 24)) +
guides(fill = guide_colorbar(direction= 'vertical',
barwidth = 1,
barheight = 30,
label.position = 'left')) +
labs(subtitle = 'Cumulative COVID-19 deaths per million population',
title = paste0(format(this_date, '%B %d, %Y'))) +
geom_text(data = today_centroids,
aes(x = x,
y = y,
group = NA,
label = paste0(ccaa, '\n',
round(var, digits = 2))),
alpha = 0.8,
size = 1.5)
ggsave(paste0('/tmp/animation_map/', this_date, '.png'),
plot = g,
height = 6, width = 9)
}
# Command line
cd /tmp/animation_map
mogrify -resize 50% *.png
convert -delay 20 -loop 0 *.png result.gif
df_country %>% filter(country == 'Spain') %>% arrange(date) %>% tail
# A tibble: 6 x 10
# Groups: country [1]
country date cases deaths uci hospitalizations cases_non_cum
<chr> <date> <dbl> <dbl> <dbl> <int> <dbl>
1 Spain 2020-04-26 230663 23521 10371 0 2604
2 Spain 2020-04-27 233507 23822 10436 0 2844
3 Spain 2020-04-28 236827 24275 10720 0 3320
4 Spain 2020-04-29 239946 24543 10776 0 3119
5 Spain 2020-04-30 242988 24824 10860 0 3042
6 Spain NA 0 0 0 0 0
# … with 3 more variables: deaths_non_cum <dbl>, uci_non_cum <dbl>, iso <chr>
pd <- df_country
pd$value <- pd$deaths_non_cum
the_date <- Sys.Date() - 1
pd <- pd %>%
filter(date == the_date) %>%
dplyr::select(country, iso, cases, cases_non_cum,
deaths, value) %>%
dplyr::arrange(desc(value)) %>%
left_join(world_pop %>% dplyr::select(-country)) %>%
mutate(value_per_million = value / pop * 1000000) #%>%
# arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$country <- factor(pd$country, levels = pd$country)
ggplot(data = pd,
aes(x = country,
y = value)) +
geom_bar(stat = 'identity',
fill = 'black') +
theme_simple() +
geom_text(aes(label = value),
nudge_y = -20,
size = 4,
color = 'white') +
labs(title = paste0('Confirmed COVID-19 deaths on ', the_date),
x = '', y = '')
pd
# A tibble: 10 x 10
# Groups: country [10]
country iso cases cases_non_cum deaths value pop region sub_region
<fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 US USA 1.07e6 29515 62996 2029 3.27e8 Ameri… Northern …
2 United… GBR 1.72e5 6040 26842 676 6.65e7 Europe Northern …
3 Brazil BRA 8.72e4 7502 6006 493 2.09e8 Ameri… Latin Ame…
4 France FRA 1.67e5 756 24410 289 6.70e7 Europe Western E…
5 Italy ITA 2.05e5 1872 27967 285 6.04e7 Europe Southern …
6 Spain ESP 2.43e5 3042 24824 281 4.67e7 Europe Southern …
7 Germany DEU 1.63e5 1470 6623 156 8.29e7 Europe Western E…
8 Canada CAN 5.45e4 1592 3310 155 3.71e7 Ameri… Northern …
9 Mexico MEX 1.92e4 1425 1859 127 1.26e8 Ameri… Latin Ame…
10 Sweden SWE 2.11e4 790 2586 124 1.02e7 Europe Northern …
# … with 1 more variable: value_per_million <dbl>
Deaths per million yesterday per CCAA
pd <- esp_df
pd$value <- pd$deaths_non_cum
the_date <- max(pd$date)
pd <- pd %>%
filter(date == max(date)) %>%
dplyr::select(ccaa, cases, cases_non_cum,
deaths, value) %>%
dplyr::arrange(desc(value)) %>%
left_join(esp_pop)%>%
mutate(value_per_million = value / pop * 1000000) #%>%
# arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$ccaa <- factor(pd$ccaa, levels = pd$ccaa)
ggplot(data = pd,
aes(x = ccaa,
y = value)) +
geom_bar(stat = 'identity',
fill = 'black') +
theme_simple() +
geom_text(aes(label = value),
nudge_y = -20,
size = 4,
color = 'white')
pd
# A tibble: 10 x 7
ccaa cases cases_non_cum deaths value pop value_per_million
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 <NA> NA NA NA NA NA NA
2 <NA> NA NA NA NA NA NA
3 <NA> NA NA NA NA NA NA
4 <NA> NA NA NA NA NA NA
5 <NA> NA NA NA NA NA NA
6 <NA> NA NA NA NA NA NA
7 <NA> NA NA NA NA NA NA
8 <NA> NA NA NA NA NA NA
9 <NA> NA NA NA NA NA NA
10 <NA> NA NA NA NA NA NA
dir.create('/tmp/animation_deaths')
dates <- seq(as.Date('2020-03-17'), (Sys.Date()-1), by = 1)
for(i in 1:length(dates)){
this_date <- dates[i]
pd <- df_country
pd$value <- pd$deaths_non_cum
pd <- pd %>%
filter(date == max(this_date)) %>%
dplyr::select(country, cases, cases_non_cum,
deaths, value) %>%
dplyr::arrange(desc(value))
pd <- pd[1:10,]
pd <- pd %>% filter(value > 0)
pd$country <- gsub(' ', '\n', pd$country)
pd$country <- factor(pd$country, levels = pd$country)
pd$color_var <- pd$country == 'Spain'
if('Spain' %in% pd$country){
cols <- rev(c('darkred', 'black'))
} else {
cols <- 'black'
}
g <- ggplot(data = pd,
aes(x = country,
y = value)) +
geom_bar(stat = 'identity',
aes(fill = color_var),
alpha = 0.8,
show.legend = FALSE) +
theme_simple() +
geom_text(aes(label = value),
nudge_y = max(pd$value) * .05,
size = 5,
color = 'black') +
labs(title = 'Daily (non-cumulative) COVID-19 deaths',
subtitle = format(this_date, '%B %d')) +
labs(x = 'Country',
y = 'Deaths') +
theme(axis.text = element_text(size = 15),
plot.subtitle = element_text(size = 20)) +
scale_fill_manual(name ='',
values = cols) +
ylim(0, 900)
ggsave(filename = paste0('/tmp/animation_deaths/', this_date, '.png'),
g)
}
# Command line
cd /tmp/animation_deaths
mogrify -resize 50% *.png
convert -delay 50 -loop 0 *.png result.gif
pd <- by_country <- esp_df %>% mutate(country = 'Spain') %>%
bind_rows(ita %>% mutate(country = 'Italy')) %>%
bind_rows(por_df %>% mutate(country = 'Portugal')) %>%
bind_rows(fra_df %>% mutate(country = 'France'))
pd$value <- pd$deaths_non_cum
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
max(pd$date),
by = 1),
ccaa = sort(unique(pd$ccaa)))
Error in seq.int(0, to0 - from, by): 'to' must be a finite number
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
Error in left_join(left, right): object 'left' not found
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
filter(date <= max_date) %>%
filter(value > 0)
the_limits <- c(1, 600)
the_breaks <- c(1, seq(100, 600, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
aes(x = date,
y = ccaa,
color = value,
size = value)) +
# geom_tile(color = 'white') +
geom_point(alpha = 0.8) +
# geom_tile() +
scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
name = '',
limits = the_limits,
breaks = the_breaks) +
# scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
# name = '',
# limits = the_limits,
# breaks = the_breaks) +
scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
theme_simple() +
facet_wrap(~country, scales = 'free_y') +
theme(strip.text = element_text(size = 20),
axis.title = element_blank(),
axis.text = element_text(size = 10),
axis.text.x = element_text(size = 12)) +
guides(color = guide_legend(), size = guide_legend()) +
labs(title = 'Daily (non-cumulative) COVID-19 deaths by sub-state regions',
caption = paste0('Data as of ', max_date))
Error: Faceting variables must have at least one value
ggsave('/tmp/1.png',
width = 10,
height = 8)
Error: Faceting variables must have at least one value
pd <- by_country <- esp_df %>% mutate(country = 'Spain') %>% bind_rows(ita %>% mutate(country = 'Italy'))
poppy <- bind_rows(ita_pop, esp_pop)
pd <- pd %>% left_join(poppy)
pd$value <- pd$deaths_non_cum / pd$pop * 1000000
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
max(pd$date),
by = 1),
ccaa = sort(unique(pd$ccaa)))
Error in seq.int(0, to0 - from, by): 'to' must be a finite number
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
Error in left_join(left, right): object 'left' not found
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
filter(date <= max_date) %>%
filter(value > 0)
the_limits <- c(1, 60)
the_breaks <- c(1, seq(10, 60, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
aes(x = date,
y = ccaa,
color = value,
size = value)) +
# geom_tile(color = 'white') +
geom_point(alpha = 0.8) +
scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
name = '',
limits = the_limits,
breaks = the_breaks) +
scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
theme_simple() +
facet_wrap(~country, scales = 'free') +
theme(strip.text = element_text(size = 26),
axis.title = element_blank(),
axis.text = element_text(size = 16),
axis.text.x = element_text(size = 12)) +
guides(color = guide_legend(), size = guide_legend()) +
labs(title = 'Daily COVID-19 deaths per 1,000,000 population by sub-state regions',
caption = paste0('Data as of ', max_date))
Error: Faceting variables must have at least one value
ggsave('/tmp/2.png',
width = 10,
height = 8)
Error: Faceting variables must have at least one value
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
# ifelse(x == 'Cataluña', 'Cataluña',
'Otras CCAA')
# )
}
place_transform_ita <- function(x){
ifelse(x == 'Lombardia', 'Lombardia',
# ifelse(x == 'Emilia Romagna', 'Emilia Romagna',
'Otras regiones italianas')
# )
}
pd <- esp_df %>% mutate(country = 'España') %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
country = 'Italia')) %>%
group_by(country, ccaa, date) %>%
summarise(cases = sum(cases),
uci = sum(uci),
deaths = sum(deaths),
cases_non_cum = sum(cases_non_cum),
deaths_non_cum = sum(deaths_non_cum),
uci_non_cum = sum(uci_non_cum)) %>%
left_join(esp_pop %>%
mutate(ccaa = place_transform(ccaa)) %>%
bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
group_by(ccaa) %>%
summarise(pop = sum(pop))) %>%
mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
group_by(country, date) %>%
mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
levels = c('Madrid',
# 'Cataluña',
'Otras CCAA',
'Lombardia',
# 'Emilia Romagna',
'Otras regiones italianas'))
cols <- c(
rev(brewer.pal(n = 3, 'Reds'))[1:2],
rev(brewer.pal(n = 3, 'Blues'))[1:2]
)
ggplot(data = pd,
aes(x = date,
y = deaths_non_cum_p,
fill = ccaa,
group = ccaa)) +
geom_bar(stat = 'identity',
position = position_dodge(width = 0.99)) +
scale_fill_manual(name = '', values = cols) +
scale_color_manual(name = '', values = cols) +
# geom_line(size = 0.2,
# aes(color = ccaa)) +
xlim(as.Date('2020-03-09'),
Sys.Date()-1) +
theme_simple() +
labs(x = 'Fecha',
y = 'Muertes diarias por 1.000.000',
title = 'Muertes por 1.000.000 habitantes') +
theme(legend.position = 'top') +
geom_text(aes(label = round(deaths_non_cum_p, digits = 1),
color = ccaa,
y = deaths_non_cum_p + 2,
group = ccaa),
size = 2.5,
angle = 90,
position = position_dodge(width = 0.99))
label_data <- pd %>%
filter(country %in% c('Italia', 'España')) %>%
group_by(country) %>%
filter(date == max(date)) %>%
mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>%
mutate(x = date - 2,
y = p_deaths_country +
ifelse(p_deaths_country > 50, 10, -10))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
aes(x = date,
y = p_deaths_country,
color = ccaa,
group = ccaa)) +
# geom_bar(stat = 'identity',
# position = position_dodge(width = 0.99)) +
scale_fill_manual(name = '', values = cols) +
scale_color_manual(name = '', values = cols) +
geom_line(size = 2,
aes(color = ccaa)) +
geom_point(size = 3,
aes(color = ccaa)) +
facet_wrap(~country, scales = 'free_x') +
# xlim(as.Date('2020-03-09'),
# Sys.Date()-1) +
theme_simple() +
geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
# geom_line(lty = 2, alpha = 0.6) +
labs(x = 'Fecha',
y = 'Porcentaje de muertes',
title = 'Porcentaje de muertes del país: región más afectada vs. resto del país',
subtitle = 'A partir del primer día en cada país con 50 o más muertes') +
theme(legend.position = 'top',
strip.text = element_text(size = 30),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(nrow = 2,
keywidth = 5)) +
geom_text(data = label_data,
aes(x = x - 2,
y = y,
label = label,
color = ccaa),
size = 6,
show.legend = FALSE)
# Spanish data
a <- esp_df %>%
left_join(esp_pop) %>%
mutate(country = 'Spain')
# Italian data
b <- ita %>%
left_join(ita_pop) %>%
mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
mutate(cases = cases) %>%
mutate(ccaa = district) %>%
mutate(country = 'China') %>%
left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000
# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
arrange(date) %>%
group_by(ccaa) %>%
mutate(start_deaths = min(date[deaths >= x_deaths]),
start_cases = min(date[cases >= x_cases]),
start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
ungroup %>%
mutate(days_since_start_deaths = date - start_deaths,
days_since_start_cases = date - start_cases,
days_since_start_deaths_pm = date - start_deaths_pm,
days_since_start_cases_pm = date - start_cases_pm)
# Define plot data
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
mutate(country = ifelse(country == 'China',
'Hubei (China)',
ifelse(country == 'Italy', 'Italia', 'España')))
lombardy_location <- (max(pd_big$days_since_start_deaths_pm[pd_big$ccaa == 'Lombardia']))
Error in eval(expr, envir, enclos): object 'pd_big' not found
# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
(
(country == 'Hubei (China)' & days_since_start_deaths_pm == 22) |
(date == max(date) & country == 'España' & deaths_pm > 40 & days_since_start_deaths_pm >= 7) & ccaa != 'CyL' |
(date == max(date) & country == 'Italia' &
ccaa != 'Liguria' & days_since_start_deaths_pm > 15)
))
# Get differential label data based on what to be emphasized
bigs <- c('Madrid', 'Lombardia', 'Hubei')
label_data_big <- label_data %>%
filter(ccaa %in% bigs)
label_data_small <- label_data %>%
filter(!ccaa %in% bigs)
pd_big <- pd %>%
filter(ccaa %in% bigs)
pd_small <- pd %>%
filter(!ccaa %in% bigs)
# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c('darkred', '#FF6633', '#006666')
ggplot(data = pd_big,
aes(x = days_since_start_deaths_pm,
y = deaths_pm,
group = ccaa)) +
geom_line(aes(color = country),
alpha = 0.9,
size = 2) +
geom_line(data = pd_small,
aes(x = days_since_start_deaths_pm,
y = deaths_pm,
color = country),
alpha = 0.7,
size = 1) +
scale_y_log10() +
scale_color_manual(name = '',
values = c(cols)) +
theme_simple() +
theme(legend.position = 'top') +
labs(x = 'Dias desde "el comienzo del brote"',
y = 'Muertes por millón de habitantes',
title = 'Muertes por 1.000.000 habitantes',
subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población\nLíneas rojas: CCAA; líneas verde-azules: regiones italianas; línea naranja: Hubei, China'),
caption = '@joethebrew | www.databrew.cc') +
geom_text(data = label_data_big,
aes(x = days_since_start_deaths_pm + 0.6,
y = (deaths_pm + 50),
label = gsub(' ', '\n', ccaa),
color = country),
size = 8,
alpha = 0.9,
show.legend = FALSE) +
geom_text(data = label_data_small,
aes(x = days_since_start_deaths_pm + 0.6,
y = deaths_pm + (log(deaths_pm)/2),
label = gsub(' ', '\n', ccaa),
color = country),
size = 5,
alpha = 0.6,
show.legend = FALSE) +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 16),
legend.text = element_text(size = 16),
plot.title = element_text(size = 25)) +
xlim(0, lombardy_location + 5)
Error in limits(c(...), "x"): object 'lombardy_location' not found
# Spanish data
a <- esp_df %>%
left_join(esp_pop) %>%
mutate(country = 'Spain')
# Italian data
b <- ita %>%
left_join(ita_pop) %>%
mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
mutate(cases = cases) %>%
mutate(ccaa = district) %>%
mutate(country = 'China') %>%
left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000
# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
arrange(date) %>%
group_by(ccaa) %>%
mutate(start_deaths = min(date[deaths >= x_deaths]),
start_cases = min(date[cases >= x_cases]),
start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
ungroup %>%
mutate(days_since_start_deaths = date - start_deaths,
days_since_start_cases = date - start_cases,
days_since_start_deaths_pm = date - start_deaths_pm,
days_since_start_cases_pm = date - start_cases_pm)
# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
mutate(country = ifelse(country == 'China',
'China',
ifelse(country == 'Italy', 'Italia', 'España')))
# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
(
(country == 'China' & deaths >10 & days_since_start_deaths == 29) |
(date == max(date) & country == 'España' & deaths > 90) |
(date == max(date) & country == 'Italia' &
ccaa != 'Liguria' & days_since_start_deaths > 10)
))
# Get differential label data based on what to be emphasized
label_data_big <- label_data %>%
filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
label_data_small <- label_data %>%
filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_big <- pd %>%
filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633', 'darkred', '#006666')
ggplot(data = pd_big,
aes(x = days_since_start_deaths,
y = deaths,
group = ccaa)) +
geom_line(aes(color = country),
alpha = 0.9,
size = 2) +
geom_line(data = pd_small,
aes(x = days_since_start_deaths,
y = deaths,
color = country),
alpha = 0.7,
size = 1) +
scale_y_log10() +
scale_color_manual(name = '',
values = c(cols)) +
theme_simple() +
theme(legend.position = 'top') +
labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
y = 'Muertes',
title = 'Muertes por COVID-19',
caption = '@joethebrew | www.databrew.cc') +
geom_text(data = label_data_big,
aes(x = days_since_start_deaths + 1.6,
y = ifelse(ccaa == 'Hubei', (deaths -500),
ifelse(ccaa == 'Lombardia', (deaths + 700),
(deaths + 300))),
label = gsub(' ', '\n', ccaa),
color = country),
size = 8,
alpha = 0.9,
show.legend = FALSE) +
geom_text(data = label_data_small,
aes(x = days_since_start_deaths + 1.6,
align = 'left',
y = deaths ,
label = ccaa,
# label = gsub(' ', '\n', ccaa),
color = country),
size = 5,
alpha = 0.6,
show.legend = FALSE) +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 20),
legend.text = element_text(size = 16),
plot.title = element_text(size = 30)) +
xlim(0, 50)
# Spanish data
a <- esp_df %>%
left_join(esp_pop) %>%
mutate(country = 'Spain')
# Italian data
b <- ita %>%
left_join(ita_pop) %>%
mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
mutate(cases = cases) %>%
mutate(ccaa = district) %>%
mutate(country = 'China') %>%
left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000
# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
arrange(date) %>%
group_by(ccaa) %>%
mutate(start_deaths = min(date[deaths >= x_deaths]),
start_cases = min(date[cases >= x_cases]),
start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
ungroup %>%
mutate(days_since_start_deaths = date - start_deaths,
days_since_start_cases = date - start_cases,
days_since_start_deaths_pm = date - start_deaths_pm,
days_since_start_cases_pm = date - start_cases_pm)
# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
mutate(country = ifelse(country == 'China',
'China',
ifelse(country == 'Italy', 'Italia', 'España')))
add_zero <- function(x, n){
x <- as.character(x)
adders <- n - nchar(x)
adders <- ifelse(adders < 0, 0, adders)
for (i in 1:length(x)){
if(!is.na(x[i])){
x[i] <- paste0(
paste0(rep('0', adders[i]), collapse = ''),
x[i],
collapse = '')
}
}
return(x)
}
# # Define label data
# label_data <- pd %>% group_by(ccaa) %>% filter(
# (
# (country == 'China' & deaths >10 & days_since_start_deaths == 29) |
# (date == max(date) & country == 'España' & deaths > 90) |
# (date == max(date) & country == 'Italia' &
# ccaa != 'Liguria' & days_since_start_deaths > 10)
# ))
# # Get differential label data based on what to be emphasized
# label_data_big <- label_data %>%
# filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# label_data_small <- label_data %>%
# filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
#
pd_big <- pd %>%
filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633', 'darkred', '#006666')
the_dir <- '/tmp/animation/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
the_date <- the_dates[i]
pd_big_sub <- pd_big %>% filter(date <= the_date)
pd_big_current <- pd_big_sub %>% filter(date == the_date)
pd_small_sub <- pd_small %>% filter(date <= the_date)
pd_small_current <- pd_small_sub %>% filter(date == the_date)
label_data_big <-
pd_big_sub %>%
filter(ccaa %in% c('Lombardia', 'Madrid', 'Hubei')) %>%
group_by(ccaa) %>%
filter(date == max(date)) %>%
ungroup %>%
mutate(days_since_start_deaths = ifelse(ccaa == 'Hubei' &
days_since_start_deaths >32,
32,
days_since_start_deaths))
label_data_small <-
pd_small_sub %>%
filter(ccaa %in% c('Emilia Romagna',
'Cataluña',
'CLM',
'País Vasco',
'Veneto',
'Piemonte',
'Henan',
'Heilongjiang')) %>%
group_by(ccaa) %>%
filter(date == max(date))
n_countries <- length(unique(pd_big_sub$country))
if(n_countries == 3){
sub_cols <- cols
}
if(n_countries == 2){
sub_cols <- cols[c(1,3)]
}
if(n_countries == 1){
sub_cols <- cols[1]
}
g <- ggplot(data = pd_big_sub,
aes(x = days_since_start_deaths,
y = deaths,
group = ccaa)) +
geom_line(aes(color = country),
alpha = 0.9,
size = 2) +
geom_line(data = pd_small_sub,
aes(x = days_since_start_deaths,
y = deaths,
color = country),
alpha = 0.7,
size = 1) +
geom_point(data = pd_big_current,
aes(x = days_since_start_deaths,
y = deaths,
color = country),
size = 3) +
geom_point(data = pd_small_current,
aes(x = days_since_start_deaths,
y = deaths,
color = country),
size = 1, alpha = 0.6) +
scale_y_log10(limits = c(5, 4500)) +
scale_color_manual(name = '',
values = sub_cols) +
theme_simple() +
theme(legend.position = 'top') +
labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
y = 'Muertes',
title = format(the_date, '%d %b'),
subtitle = 'Muertes por COVID-19',
caption = '@joethebrew | www.databrew.cc') +
geom_text(data = label_data_big,
aes(x = days_since_start_deaths + 1,
y = deaths,
hjust = 0,
label = gsub(' ', '\n', ccaa),
color = country),
size = 8,
alpha = 0.9,
show.legend = FALSE) +
geom_text(data = label_data_small,
aes(x = days_since_start_deaths + 1.6,
y = deaths ,
label = ccaa,
# label = gsub(' ', '\n', ccaa),
color = country),
size = 5,
alpha = 0.6,
show.legend = FALSE) +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 20),
legend.text = element_text(size = 16),
plot.title = element_text(size = 35),
plot.subtitle = element_text(size = 24)) +
xlim(0, 38)
message(i)
ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
height = 7,
width = 10.5)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 20 -loop 0 *.png result.gif
# Spanish data
a <- esp_df %>%
left_join(esp_pop) %>%
mutate(country = 'Spain')
joined <- a
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000
# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
arrange(date) %>%
group_by(ccaa) %>%
mutate(start_deaths = min(date[deaths >= x_deaths]),
start_cases = min(date[cases >= x_cases]),
start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
ungroup %>%
mutate(days_since_start_deaths = date - start_deaths,
days_since_start_cases = date - start_cases,
days_since_start_deaths_pm = date - start_deaths_pm,
days_since_start_cases_pm = date - start_cases_pm)
# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
mutate(country = ifelse(country == 'China',
'China',
ifelse(country == 'Italy', 'Italia', 'España')))
bigs <- c('Madrid', 'Cataluña', 'CLM', 'CyL', 'País Vasco', 'La Rioja')
pd_big <- pd %>%
filter(ccaa %in% bigs)
pd_small <- pd %>%
filter(!ccaa %in% bigs)
# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$country)))
the_dir <- '/tmp/animation2/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
the_date <- the_dates[i]
pd_big_sub <- pd_big %>% filter(date <= the_date)
pd_big_current <- pd_big_sub %>% filter(date == the_date)
pd_small_sub <- pd_small %>% filter(date <= the_date)
pd_small_current <- pd_small_sub %>% filter(date == the_date)
label_data_big <-
pd_big_sub %>%
filter(ccaa %in% bigs) %>%
group_by(ccaa) %>%
filter(date == max(date)) %>%
ungroup
label_data_small <-
pd_small_sub %>%
group_by(ccaa) %>%
filter(date == max(date))
# sub_cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$ccaa)))
sub_cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Dark2'))(length(unique(pd$ccaa)))
# sub_cols <- rainbow((length(unique(pd$ccaa))))
g <- ggplot(data = pd_big_sub,
aes(x = days_since_start_deaths,
y = deaths,
group = ccaa)) +
geom_line(aes(color = ccaa),
alpha = 0.9,
size = 2) +
geom_line(data = pd_small_sub,
aes(x = days_since_start_deaths,
y = deaths,
color = ccaa),
alpha = 0.7,
size = 1) +
geom_point(data = pd_big_current,
aes(x = days_since_start_deaths,
y = deaths,
color = ccaa),
size = 3) +
geom_point(data = pd_small_current,
aes(x = days_since_start_deaths,
y = deaths,
color = ccaa),
size = 1, alpha = 0.6) +
geom_point(data = pd,
aes(x = days_since_start_deaths,
y = deaths,
color = ccaa),
size = 1, alpha = 0.01) +
scale_y_log10(limits = c(5, max(pd$deaths)*1.2),
breaks = c(10, 50, 100, 500, 1000)) +
scale_color_manual(name = '',
values = sub_cols) +
theme_simple() +
theme(legend.position = 'top') +
labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
y = 'Muertes',
title = format(the_date, '%d %b'),
subtitle = 'Muertes por COVID-19',
caption = '@joethebrew | www.databrew.cc') +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 20),
legend.text = element_text(size = 16),
plot.title = element_text(size = 35),
plot.subtitle = element_text(size = 24)) +
xlim(0, 20) +
theme(legend.position = 'none')
message(i)
if(nrow(label_data_big) > 0){
g <- g +
geom_text(data = label_data_big,
aes(x = days_since_start_deaths + 0.2,
y = deaths,
hjust = 0,
label = gsub(' ', ' ', ccaa),
color = ccaa),
size = 8,
alpha = 0.9,
show.legend = FALSE) +
geom_text(data = label_data_small,
aes(x = days_since_start_deaths + 0.2,
y = deaths ,
label = ccaa,
# label = gsub(' ', '\n', ccaa),
color = ccaa),
size = 5,
alpha = 0.6,
show.legend = FALSE)
}
ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
height = 7,
width = 12)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 25 -loop 0 *.png result.gif
# Spanish data
a <- esp_df %>%
left_join(esp_pop) %>%
mutate(country = 'Spain')
# Italian data
b <- ita %>%
left_join(ita_pop) %>%
mutate(country = 'Italy')
# join
joined <- bind_rows(a, b)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000
# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
arrange(date) %>%
group_by(ccaa) %>%
mutate(start_deaths = min(date[deaths >= x_deaths]),
start_cases = min(date[cases >= x_cases]),
start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
ungroup %>%
mutate(days_since_start_deaths = date - start_deaths,
days_since_start_cases = date - start_cases,
days_since_start_deaths_pm = date - start_deaths_pm,
days_since_start_cases_pm = date - start_cases_pm)
ggplot(data = joined %>% filter(days_since_start_deaths_pm >= 0),
aes(x = days_since_start_deaths_pm,
y = deaths_pm,
group = ccaa)) +
geom_line(aes(color = country),
alpha = 0.8,
size = 2) +
scale_y_log10() +
scale_color_manual(name = '',
values = c('darkorange', 'purple')) +
theme_simple() +
theme(legend.position = 'none') +
labs(x = 'Days since "start out outbreak"',
y = 'Deaths per million',
title = 'Deaths per capita, Italian regions vs. Spanish autonomous communities',
subtitle = paste0('Day 0 ("start of outbreak") = first day at ', x_deaths_pm, ' or greater cumulative deaths per million'),
caption = '@joethebrew | www.databrew.cc') +
geom_text(data = joined %>% group_by(ccaa) %>% filter(date == max(date) &
(
(country == 'Spain' & deaths_pm > 25) |
(country == 'Italy' & days_since_start_deaths_pm > 10)
)),
aes(x = days_since_start_deaths_pm + 0.6,
y = deaths_pm,
label = gsub(' ', '\n', ccaa),
color = country),
size = 6) +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 20)) +
xlim(0, 23)
ggsave('/tmp/italy_comparison.png',
height = 6,
width = 10)
# Separate for Catalonia
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
mutate(country = ifelse(ccaa == 'Cataluña',
'Catalonia',
country)) %>%
mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(country == 'Catalonia')
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) &
(
(country == 'Catalonia') |
(country == 'Spain' & deaths_pm > 25) |
(country == 'Italy' & days_since_start_deaths_pm > 10)
))
ggplot(data = pd,
aes(x = days_since_start_deaths_pm,
y = deaths_pm,
group = ccaa)) +
geom_line(aes(color = country),
alpha = 0.3,
size = 1.5) +
geom_line(data = pdcat,
aes(color = country),
alpha = 0.8,
size = 2) +
geom_point(data = pdcat %>% filter(date == max(date)),
aes(color = country),
alpha = 0.8,
size = 4) +
scale_y_log10() +
scale_color_manual(name = '',
values = c('darkred', 'darkorange', "purple")) +
theme_simple() +
theme(legend.position = 'none') +
labs(x = 'Dies des del "començament del brot"',
y = 'Morts per milió',
title = 'Morts per càpita: Catalunya, comunitats autònomes, regions italianes',
subtitle = paste0('Dia 0 ("començament del brot") = primer dia a ', x_deaths_pm, ' o més morts acumulades per milió de població'),
caption = '@joethebrew | www.databrew.cc') +
geom_text(data = label_data,
aes(x = days_since_start_deaths_pm +0.2 ,
y = deaths_pm +3,
hjust = 0,
label = gsub(' ', '\n', ccaa),
color = country),
size = 6,
alpha = 0.7) +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 20)) +
xlim(0, 24)
ggsave('/tmp/cat_italy_comparison.png',
height = 6,
width = 10)
# Straightforward Lombardy, Madrid, Cat comparison
specials <- c('Lombardia', 'Madrid')
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
mutate(country = ifelse(ccaa == 'Cataluña',
'Catalonia',
country)) %>%
mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(ccaa %in% specials)
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) &
(
# (country == 'Catalonia') |
(country == 'Spain' & deaths_pm > 20) |
(country == 'Italy' & days_since_start_deaths_pm >= 10)
))
ggplot(data = pd,
aes(x = days_since_start_deaths_pm,
y = deaths_pm,
group = ccaa)) +
geom_line(aes(color = country),
alpha = 0.3,
size = 1.5) +
geom_line(data = pdcat,
aes(color = country),
alpha = 0.8,
size = 2) +
scale_y_log10() +
scale_color_manual(name = '',
values = c('darkred', 'darkorange', "purple")) +
theme_simple() +
theme(legend.position = 'none') +
labs(x = 'Dias desde "el comienzo del brote"',
y = 'Muertes por millón de habitantes',
title = 'Muertes acumuladas por 1.000.000 habitantes',
subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población'),
caption = '@joethebrew | www.databrew.cc') +
geom_text(data = label_data %>% filter(!ccaa %in% specials),
aes(x = days_since_start_deaths_pm + 0.4,
y = deaths_pm +3,
label = gsub(' ', '\n', ccaa),
color = country),
size = 5,
alpha = 0.5) +
geom_text(data = label_data %>% filter(ccaa %in% specials),
aes(x = days_since_start_deaths_pm ,
y = deaths_pm +30,
label = gsub(' ', '\n', ccaa),
color = country),
size = 8,
alpha = 0.8) +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 20)) +
xlim(0, 23)
ggsave('/tmp/mad_lom_italy_comparison.png',
height = 6,
width = 10)
isos <- sort(unique(world_pop$sub_region))
isos <- c('Central Asia', 'Eastern Asia', 'Eastern Europe',
'Latin America and the Caribbean',
'Northern Africa', 'Northern America',
'Nothern Europe',
'South-eastern Asia',
'Southern Asia', 'Southern Europe',
'Sub-Saharan Africa', 'Western Asia', 'Western Europe')
dir.create('/tmp/world')
for(i in 1:length(isos)){
this_iso <- isos[i]
message(i, ' ', this_iso)
countries <- world_pop %>% filter(sub_region == this_iso)
pd <- df %>%
filter(!country %in% c('Guyan
a', 'Bahamas', 'The Bahamas')) %>%
group_by(country, iso, date) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup %>%
group_by(country) %>%
filter(length(which(cases > 0)) > 1) %>%
ungroup %>%
filter(iso %in% countries$iso)
if(nrow(pd) > 0){
cols <- colorRampPalette(brewer.pal(n = 8, 'Spectral'))(length(unique(pd$country)))
cols <- sample(cols, length(cols))
# Plot
n_countries <- (length(unique(pd$country)))
ggplot(data = pd,
aes(x = date,
# color = country,
# fill = country,
y = cases)) +
theme_simple() +
# geom_point() +
# geom_line() +
geom_area(fill = 'darkred', alpha = 0.3, color = 'darkred') +
# scale_color_manual(name = '',
# values = cols) +
# scale_fill_manual(name = '',
# values = cols) +
theme(legend.position = 'none',
axis.text = element_text(size = 6),
strip.text = element_text(size = ifelse(n_countries > 20, 6,
ifelse(n_countries > 10, 10,
ifelse(n_countries > 5, 11, 12))) ),
legend.text = element_text(size = 6)) +
# scale_y_log10() +
facet_wrap(~country,
scales = 'free') +
labs(x = '',
y = 'Confirmed cases',
title = paste0('Confirmed cases of COVID-19 in ', this_iso))
ggsave(paste0('/tmp/world/', this_iso, '.png'),
width = 12,
height = 7)
}
}
plot_data <- df_country %>% filter(country %in% c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway')) %>% mutate(geo = country)
roll_curve(plot_data, pop = T)
dir.create('/tmp/countries')
roll_curve_country <- function(the_country = 'Spain'){
plot_data <- df_country %>% filter(country %in% the_country) %>% mutate(geo = country)
g1 <- roll_curve(plot_data, pop = F)
g2 <- roll_curve(plot_data, pop = T)
g3 <- roll_curve(plot_data, pop = F, deaths = T)
g4 <- roll_curve(plot_data, pop = T, deaths = T)
ggsave(paste0('/tmp/countries/', the_country, '1.png'), g1)
ggsave(paste0('/tmp/countries/', the_country, '2.png'), g2)
ggsave(paste0('/tmp/countries/', the_country, '3.png'), g3)
ggsave(paste0('/tmp/countries/', the_country, '4.png'), g4)
}
countries <- c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway', 'US', 'United Kingdom', 'Korea, South',
'China', 'Japan', 'Switzerland', 'Sweden', 'Denmark', 'Netherlands', 'Iran', 'Canada')
for(i in 1:length(countries)){
roll_curve_country(the_country = countries[i])
}
Error in ts(x): 'ts' object must have one or more observations
# Cases
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data)
# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T)
# Deaths
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, deaths = T)
# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T, deaths = T)
plot_data <- esp_df %>% mutate(geo = ccaa)
roll_curve(plot_data, pop = T, deaths = T)
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, deaths = T)
# Latest in Spain
pd <- esp_df %>%
filter(date == max(date)) %>%
mutate(p = deaths / sum(deaths) * 100)
text_size <- 12
# deaths
ggplot(data = pd,
aes(x = ccaa,
y = deaths)) +
geom_bar(stat = 'identity',
fill = 'black') +
theme_simple() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = '',
y = 'Deaths | Muertes',
title = 'COVID-19 deaths in Spain',
subtitle = paste0('Data as of ', max(pd$date)),
caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
theme(legend.position = 'top',
legend.text = element_text(size = text_size * 2),
axis.title = element_text(size = text_size * 2),
plot.title = element_text(size = text_size * 2.3),
axis.text.x = element_text(size = text_size * 1.5)) +
geom_text(data = pd %>% filter(deaths > 0),
aes(x = ccaa,
y = deaths,
label = paste0(deaths, '\n(',
round(p, digits = 1), '%)')),
size = text_size * 0.3,
nudge_y = 180) +
ylim(0, max(pd$deaths * 1.1))
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
ggsave('/tmp/spain.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
Muertes relativas por CCAA
# Latest in Spain
pd <- esp_df %>%
filter(date == max(date)) %>%
mutate(p = deaths / sum(deaths) * 100)
pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths / pd$pop * 100000
# deaths
ggplot(data = pd,
aes(x = ccaa,
y = value)) +
geom_bar(stat = 'identity',
fill = 'black') +
theme_simple() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = '',
y = 'Deaths per 100,000',
title = 'COVID-19 deaths per 100.000',
subtitle = paste0('Data as of ', max(pd$date)),
caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
theme(legend.position = 'top',
legend.text = element_text(size = text_size * 2),
axis.title = element_text(size = text_size * 2),
plot.title = element_text(size = text_size * 2.3),
axis.text.x = element_text(size = text_size * 1.5)) +
geom_text(data = pd %>% filter(value > 0),
aes(x = ccaa,
y = value,
label = paste0(round(value, digits = 2), '\n(',
deaths, '\ndeaths)')),
size = text_size * 0.3,
nudge_y = 4.5) +
ylim(0, max(pd$value) * 1.2)
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
ggsave('/tmp/spai2.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
# Latest in Spain
pd <- esp_df %>%
filter(date == max(date)) %>%
mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)
text_size <- 12
# deaths
ggplot(data = pd,
aes(x = ccaa,
y = deaths_non_cum)) +
geom_bar(stat = 'identity',
fill = 'black') +
theme_simple() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = '',
y = 'Deaths',
title = 'COVID-19 deaths in Spain',
subtitle = paste0('Data only for ', max(pd$date)),
caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
theme(legend.position = 'top',
legend.text = element_text(size = text_size * 2),
axis.title = element_text(size = text_size * 2),
plot.title = element_text(size = text_size * 2.3),
axis.text.x = element_text(size = text_size * 1.5)) +
geom_text(data = pd %>% filter(deaths_non_cum > 0),
aes(x = ccaa,
y = deaths_non_cum,
label = paste0(deaths_non_cum, '\n(',
round(p, digits = 1), '%)')),
size = text_size * 0.3,
nudge_y = 30) +
ylim(0, max(pd$deaths_non_cum * 1.1))
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
ggsave('/tmp/spain_non_cum.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
Muertes relativas por CCAA ayer SOLO
# Latest in Spain
pd <- esp_df %>%
filter(date == max(date)) %>%
mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)
pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths_non_cum / pd$pop * 100000
# deaths
ggplot(data = pd,
aes(x = ccaa,
y = value)) +
geom_bar(stat = 'identity',
fill = 'black') +
theme_simple() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
labs(x = '',
y = 'Deaths per 100,000',
title = 'COVID-19 deaths per 100.000',
subtitle = paste0('Data as of ', max(pd$date)),
caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
theme(legend.position = 'top',
legend.text = element_text(size = text_size * 2),
axis.title = element_text(size = text_size * 2),
plot.title = element_text(size = text_size * 2.3),
axis.text.x = element_text(size = text_size * 1.5)) +
geom_text(data = pd %>% filter(value > 0),
aes(x = ccaa,
y = value,
label = paste0(round(value, digits = 2), '\n(',
deaths_non_cum, '\ndeaths)')),
size = text_size * 0.3,
nudge_y = 1) +
ylim(0, max(pd$value) * 1.3)
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
ggsave('/tmp/spain_ayer_adj.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'fixed')
ggsave('/tmp/a.png',
width = 1280 / 150,
height = 720 / 150)
Loop for everywhere (see desktop)
dir.create('/tmp/ccaas')
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
this_ccaa <- ccaas[i]
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, scales = 'fixed') + theme(strip.text = element_text(size = 30))
ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases.png'),
width = 1280 / 150,
height = 720 / 150)
}
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
this_ccaa <- ccaas[i]
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, scales = 'fixed', pop = TRUE) + theme(strip.text = element_text(size = 30))
ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases_pop.png'),
width = 1280 / 150,
height = 720 / 150)
}
# Deaths too
for(i in 1:length(ccaas)){
this_ccaa <- ccaas[i]
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))
ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths.png'),
width = 1280 / 150,
height = 720 / 150)
}
# Deaths too
for(i in 1:length(ccaas)){
this_ccaa <- ccaas[i]
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, deaths = T, scales = 'fixed', pop = TRUE) + theme(strip.text = element_text(size = 30))
ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths_pop.png'),
width = 1280 / 150,
height = 720 / 150)
}
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'free_y')
ggsave('/tmp/b.png',
width = 1280 / 150,
height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'free_y')
ggsave('/tmp/c.png',
width = 1280 / 150,
height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'fixed')
ggsave('/tmp/d.png',
width = 1280 / 150,
height = 720 / 150)
plot_data <- df_country %>% filter(country %in% c('Spain', 'Italy', 'Germany', 'France', 'US',
'China', 'Korea, South', 'Japan', 'Singapore')) %>% mutate(geo = country)
roll_curve(plot_data, scales = 'free_y')
ggsave('/tmp/z.png',
width = 1280 / 150,
height = 720 / 150)
pd <- df_country %>%
group_by(date) %>%
summarise(n = sum(cases)) %>%
filter(date < max(date))
ggplot(data = pd,
aes(x = date,
y = n)) +
geom_point() +
theme_simple() +
labs(x = 'Date',
y = 'Cases',
title = 'COVID-19 cases')
ggsave('~/Videos/update/a.png',
width = 1280 / 150,
height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/a.png'
pd <- df_country %>%
group_by(date,
country = ifelse(country == 'China', 'China', 'Other countries')) %>%
summarise(n = sum(cases)) %>%
ungroup %>%
filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
aes(x = date,
y = n,
color = country)) +
geom_line(size = 2) +
# geom_point() +
theme_simple() +
labs(x = 'Date',
y = 'Cases',
title = 'COVID-19 cases') +
scale_color_manual(name = '',
values = c('red', 'black')) +
theme(legend.text = element_text(size = 25),
legend.position = 'top')
Error in FUN(X[[i]], ...): object 'country' not found
ggsave('~/Videos/update/b.png',
width = 1280 / 150,
height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/b.png'
pd <- df_country %>%
group_by(date,
country = ifelse(country == 'China', 'China', 'Other countries')) %>%
summarise(n = sum(cases)) %>%
filter(country == 'Other countries') %>%
ungroup %>%
filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
aes(x = date,
y = n)) +
geom_line(size = 2) +
# geom_point() +
theme_simple() +
labs(x = 'Date',
y = 'Cases',
title = 'COVID-19 cases, outside of China')
ggsave('~/Videos/update/c.png',
width = 1280 / 150,
height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/c.png'
plot_day_zero(countries = c('France', 'Germany', 'Italy', 'Spain', 'Switzerland', 'Sweden', 'Norway', 'Netherlands'))
# ggsave('~/Videos/update/d.png',
# width = 1280 / 150,
# height = 720 / 150)
pd <- df_country %>%
group_by(date) %>%
summarise(n = sum(deaths)) %>%
filter(date < max(date))
ggplot(data = pd,
aes(x = date,
y = n)) +
geom_point() +
theme_simple() +
labs(x = 'Date',
y = 'Deaths',
title = 'COVID-19 deaths')
# ggsave('~/Videos/update/e.png',
# width = 1280 / 150,
# height = 720 / 150)
pd <- df_country %>%
group_by(date,
country = ifelse(country == 'China', 'China', 'Other countries')) %>%
summarise(n = sum(deaths)) %>%
ungroup %>%
filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
aes(x = date,
y = n,
color = country)) +
geom_line(size = 2) +
# geom_point() +
theme_simple() +
labs(x = 'Date',
y = 'Deaths',
title = 'COVID-19 deaths') +
scale_color_manual(name = '',
values = c('red', 'black')) +
theme(legend.text = element_text(size = 25),
legend.position = 'top')
Error in FUN(X[[i]], ...): object 'country' not found
# ggsave('~/Videos/update/f.png',
# width = 1280 / 150,
# height = 720 / 150)
plot_day_zero(countries = c('Korea, South', 'Japan', 'China', 'Singapore'))
# ggsave('~/Videos/update/g.png',
# width = 1280 / 150,
# height = 720 / 150)
Since trajectories are very unstable when cases are low, we’ll exclude from our analysis the first few days, and will only count as “outbreak” once a country reaches 150 or more cumulative cases.
# Doubling time
n_cases_start = 150
countries = c('Italy', 'Spain', 'France', 'Germany', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway')
# countries <- sort(unique(df_country$country))
out_list <- curve_list <- list()
counter <- 0
for(i in 1:length(countries)){
message(i)
this_country <- countries[i]
sub_data <-df_country %>% filter(country == this_country)
# Only calculate on countries with n_cases_start or greater cases,
# starting at the first day at n_cases_start or greater
ok <- max(sub_data$cases, na.rm = TRUE) >= n_cases_start
if(ok){
counter <- counter + 1
pd <- sub_data %>%
filter(!is.na(cases)) %>%
mutate(start_date = min(date[cases >= n_cases_start])) %>%
mutate(days_since = date - start_date) %>%
filter(days_since >= 0) %>%
mutate(days_since = as.numeric(days_since))
fit <- lm(log(cases) ~ days_since, data = pd)
# plot(pd$days_since, log(pd$cases))
# abline(fit)
## Slope
curve <- fit$coef[2]
# Predict days ahead
fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
fake <- left_join(fake, pd %>% dplyr::select(days_since, cases, date))
fake$predicted <- exp(predict(fit, newdata = fake))
# Doubling time
dt <- log(2)/fit$coef[2]
out <- tibble(country = this_country,
doubling_time = dt,
slope = curve)
out_list[[counter]] <- out
curve_list[[counter]] <- fake %>% mutate(country = this_country)
}
}
done <- bind_rows(out_list)
print(done)
# A tibble: 10 x 3
country doubling_time slope
<chr> <dbl> <dbl>
1 Italy 7.60 0.0913
2 Spain 6.45 0.107
3 France 6.38 0.109
4 Germany 6.53 0.106
5 Italy 7.60 0.0913
6 Switzerland 8.77 0.0791
7 Denmark 11.7 0.0592
8 US 4.64 0.149
9 United Kingdom 5.50 0.126
10 Norway 13.1 0.0530
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)
# Join doubling time to curves
joined <- left_join(curves, done)
# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
filter(country != 'Italy' |
date <= (Sys.Date() -1))
# Make long format
long <- joined %>%
dplyr::select(date, days_since, country, cases, predicted, doubling_time) %>%
tidyr::gather(key, value, cases:predicted) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
The below chart shows the trajectories in terms of number of cases in Europe in red, and the predicted trajectories in black. The black line assumes that the doubling rate will stay constant.
cols <- c('red', 'black')
ggplot(data = long,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
geom_line(data = long %>% filter(key != 'Confirmed cases'),
size = 1.2, alpha = 0.8) +
geom_point(data = long %>% filter(key == 'Confirmed cases')) +
geom_line(data = long %>% filter(key == 'Confirmed cases'),
size = 0.8) +
facet_wrap(~paste0(country, '\n',
'(doubling time: ',
round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_linetype_manual(name ='',
values = c(1,2)) +
scale_color_manual(name = '',
values = cols) +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >150 cumulative cases',
y = 'Cases',
title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
theme(strip.text = element_text(size = 13),
plot.title = element_text(size = 15))
Since Italy is “leading the way”, it’s helpful to also compare each country to Italy. Let’s see that.
# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = cases) %>%
dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
dplyr::select(days_since, date, country, cases, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, cases: Italy) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
cols <- c('red', 'blue', 'black')
ggplot(data = ol,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
geom_line(data = ol %>% filter(!key %in% c('Confirmed cases', 'Italy')),
size = 1.2, alpha = 0.8) +
geom_line(data = ol %>% filter(key %in% c('Italy')),
size = 0.8, alpha = 0.8) +
geom_point(data = ol %>% filter(key == 'Confirmed cases')) +
geom_line(data = ol %>% filter(key == 'Confirmed cases'),
size = 0.8) +
facet_wrap(~paste0(country, '\n',
'(doubling time: ',
round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_linetype_manual(name ='',
values = c(1,6,2)) +
scale_color_manual(name = '',
values = cols) +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >150 cumulative cases',
y = 'Cases',
title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
theme(strip.text = element_text(size = 13),
plot.title = element_text(size = 15))
In the above, what’s striking is how many places have trajectories that are worse than Italy’s. Yes, Italy has more cases, but it’s doubling time is less. Either that changes soon, or these other countries will soon have more cases than Italy.
The number of cases is not necessarily the best indicator for the severity of an outbreak of this nature. Why? Because (a) testing rates and protocols are different by place and (b) testing rates are different by time (since health services are changing their approaches as things develop). In other words, when we compare the number of cases by place and time, we are introducing significant bias.
Using deaths to gauge the magnitude of the outbreak is also problematic. Death rates are differential by age, so the number of deaths depends on a country’s population period, or age structure. Also, death rates will be a function of health services, which are not of the same quality every where. And, of course, like cases, we don’t necessarily know about all of the deaths that occur because of COVID-19.
Still, there’s an argument that death rates have less bias than case rates because deaths are easier to identify than cases. Let’s accept that argument, for the time being, and have a look at death rates by country.
# Doubling time
n_deaths_start = 5
countries = c('Italy', 'Spain', 'France', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway', 'Germany')
# countries <- sort(unique(df_country$country))
make_double_time <- function(data = df_country,
the_country = 'Spain',
n_deaths_start = 5,
time_ahead = 7){
sub_data <-data %>% filter(country == the_country)
# Only calculate on countries with n_cases_start or greater cases,
# starting at the first day at n_cases_start or greater
ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
if(ok){
counter <- counter + 1
pd <- sub_data %>%
filter(!is.na(deaths)) %>%
mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
mutate(days_since = date - start_date) %>%
filter(days_since >= 0) %>%
mutate(days_since = as.numeric(days_since)) %>%
mutate(the_weight = 1/(1 + (as.numeric(max(date) - date))))
fit <- lm(log(deaths) ~ days_since,
weights = the_weight,
data = pd)
# fitlo <- loess(deaths ~ days_since, data = pd)
# plot(pd$days_since, log(pd$cases))
# abline(fit)
## Slope
# curve <- fit$coef[2]
# Predict days ahead
day0 <- pd$date[pd$days_since == 0]
fake <- tibble(days_since = seq(0, max(pd$days_since) + time_ahead, by = 1))
fake <- fake %>%mutate(date = seq(day0, day0+max(fake$days_since), by = 1))
fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
fake$predicted <- exp(predict(fit, newdata = fake))
# fake$predictedlo <- predict(fitlo, newdata = fake)
ci <- exp(predict(fit, newdata = fake, interval = 'prediction'))
# cilo <- predict(fitlo, newdata = fake, interval = 'prediction')
fake$lwr <- ci[,'lwr']
fake$upr <- ci[,'upr']
# fake$lwrlo <- ci[,'lwr']
# fake$uprlo <- ci[,'upr']
# Doubling time
dt <- log(2)/fit$coef[2]
fake %>% mutate(country = the_country) %>% mutate(doubling_time = dt)
}
}
plot_double_time <- function(data, ylog = F){
the_labs <- labs(x = 'Date',
y = 'Deaths',
title = paste0('Predicted deaths in ', data$country[1]))
long <- data %>%
tidyr::gather(key, value, deaths:predicted) %>%
mutate(key = Hmisc::capitalize(key))
g <- ggplot() +
geom_ribbon(data = data %>% filter(date > max(long$date[!is.na(long$value) & long$key == 'Deaths'])),
aes(x = date,
ymax = upr,
ymin = lwr),
alpha =0.6,
fill = 'darkorange') +
geom_line(data = long,
aes(x = date,
y = value,
group = key,
lty = key)) +
geom_point(data = long %>% filter(key == 'Deaths'),
aes(x = date,
y = value)) +
theme_simple() +
theme(legend.position = 'right',
legend.title = element_blank()) +
the_labs
if(ylog){
g <- g + scale_y_log10()
}
return(g)
}
options(scipen = '999')
data <- make_double_time(n_deaths_start = 150, time_ahead = 7)
data
# A tibble: 55 x 8
days_since date country deaths predicted lwr upr doubling_time
<dbl> <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 2020-03-14 Spain 292 2029. 1322. 3113. 12.1
2 1 2020-03-15 Spain 314 2149. 1411. 3273. 12.1
3 2 2020-03-16 Spain 496 2276. 1505. 3442. 12.1
4 3 2020-03-17 Spain 590 2411. 1605. 3620. 12.1
5 4 2020-03-18 Spain 765 2553. 1712. 3807. 12.1
6 5 2020-03-19 Spain 993 2704. 1826. 4004. 12.1
7 6 2020-03-20 Spain 1326 2864. 1947. 4213. 12.1
8 7 2020-03-21 Spain 1672 3033. 2076. 4432. 12.1
9 8 2020-03-22 Spain 2136 3213. 2214. 4663. 12.1
10 9 2020-03-23 Spain 2707 3403. 2360. 4907. 12.1
# … with 45 more rows
dir.create('/tmp/ccaa_predictions')
plot_double_time(data, ylog = T) +
labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths')
ggsave('/tmp/ccaa_predictions/spain.png')
# All ccaas
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
message(i)
this_ccaa <- ccaas[i]
sub_data <- esp_df %>% mutate(country = ccaa)
try({
data <- make_double_time(
data = sub_data,
the_country = this_ccaa,
n_deaths_start = 5,
time_ahead = 7)
plot_double_time(data, ylog = T) +
labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)), assuming no change in growth trajectory since first day at >5 deaths')
ggsave(paste0('/tmp/ccaa_predictions/',
this_ccaa, '.png'),
height = 4.9,
width = 8.5)
})
}
Error in UseMethod("gather_") :
no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") :
no applicable method for 'gather_' applied to an object of class "NULL"
# all_countries <- sort(unique(df_country$country))
# for(i in 1:length(all_countries)){
# this_country <- all_countries[i]
# data <- make_double_time(the_country = this_country, n_deaths_start = 5)
# if(!is.null(data)){
# # print(this_country)
# g <- plot_double_time(data, ylog = F) +
# labs(subtitle = 'Basic log-linear model assuming no change in growth trajectory since first day at >5 deaths')
# ggsave(paste0('/tmp/', this_country, '.png'), height = 5, width = 8)
# print(data)
# }
# }
counter <- 0
# Africa
data <- make_double_time(the_country = 'South Africa',
n_deaths_start = 5, time_ahead = 7)
data
# A tibble: 38 x 8
days_since date country deaths predicted lwr upr doubling_time
<dbl> <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 2020-03-31 South Africa 5 8.09 6.48 10.1 7.80
2 1 2020-04-01 South Africa 5 8.84 7.12 11.0 7.80
3 2 2020-04-02 South Africa 5 9.66 7.83 11.9 7.80
4 3 2020-04-03 South Africa 9 10.6 8.61 13.0 7.80
5 4 2020-04-04 South Africa 9 11.5 9.46 14.1 7.80
6 5 2020-04-05 South Africa 11 12.6 10.4 15.3 7.80
7 6 2020-04-06 South Africa 12 13.8 11.4 16.6 7.80
8 7 2020-04-07 South Africa 13 15.1 12.5 18.1 7.80
9 8 2020-04-08 South Africa 18 16.5 13.8 19.7 7.80
10 9 2020-04-09 South Africa 18 18.0 15.1 21.4 7.80
# … with 28 more rows
dir.create('/tmp/africa_predictions')
plot_double_time(data, ylog = T) +
labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths')
out_list <- curve_list <- list()
counter <- 0
for(i in 1:length(countries)){
message(i)
this_country <- countries[i]
sub_data <-df_country %>% filter(country == this_country)
# Only calculate on countries with n_cases_start or greater cases,
# starting at the first day at n_cases_start or greater
ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
if(ok){
counter <- counter + 1
pd <- sub_data %>%
filter(!is.na(deaths)) %>%
mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
mutate(days_since = date - start_date) %>%
filter(days_since >= 0) %>%
mutate(days_since = as.numeric(days_since))
fit <- lm(log(deaths) ~ days_since, data = pd)
# plot(pd$days_since, log(pd$cases))
# abline(fit)
## Slope
# curve <- fit$coef[2]
# Predict days ahead
fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
fake$predicted <- exp(predict(fit, newdata = fake))
# Doubling time
dt <- log(2)/fit$coef[2]
out <- tibble(country = this_country,
doubling_time = dt)
out_list[[counter]] <- out
curve_list[[counter]] <- fake %>% mutate(country = this_country)
}
}
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)
# Join doubling time to curves
joined <- left_join(curves, done)
# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
filter(country != 'Italy' |
date <= (Sys.Date() -1))
# Make long format
long <- joined %>%
dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
tidyr::gather(key, value, deaths:predicted) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
cols <- c('red', 'black')
sub_data <- long %>% filter(country != 'US')
ggplot(data = sub_data,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
geom_line(data = sub_data %>% filter(key != 'Deaths'),
size = 1.2, alpha = 0.8) +
geom_point(data = sub_data %>% filter(key == 'Deaths')) +
geom_line(data = sub_data %>% filter(key == 'Deaths'),
size = 0.8) +
facet_wrap(~paste0(country, '\n',
'(doubling time: ',
round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_linetype_manual(name ='',
values = c(1,2)) +
scale_color_manual(name = '',
values = cols) +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >5 cumulative deaths',
y = 'Deaths',
title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
theme(strip.text = element_text(size = 13),
plot.title = element_text(size = 15))
Let’s again overlay Italy.
# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
cols <- c('red', 'blue', 'black')
sub_data <- ol %>%
filter(!(key == 'Predicted (based on current doubling time)' &
country == 'Spain' &
days_since > 13))
ggplot(data = sub_data,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
geom_line(data = sub_data %>% filter(!key %in% c('Deaths', 'Italy')),
size = 1.2, alpha = 0.8) +
geom_line(data = sub_data %>% filter(key %in% c('Italy')),
size = 0.8, alpha = 0.8) +
geom_point(data = sub_data %>% filter(key == 'Deaths')) +
geom_line(data = sub_data %>% filter(key == 'Deaths'),
size = 0.8) +
facet_wrap(~paste0(country, '\n',
'(doubling time: ',
round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_linetype_manual(name ='',
values = c(1,6,2)) +
scale_color_manual(name = '',
values = cols) +
scale_y_log10() +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >5 deaths',
y = 'Deaths',
title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
theme(strip.text = element_text(size = 13),
plot.title = element_text(size = 15))
Let’s look just at Spain
# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy',
country == 'Spain')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)',
ifelse(key == 'Deaths', 'Spain', key)))
cols <- c('blue', 'black', 'red')
ggplot(data = ol,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
size = 1.2, alpha = 0.8) +
geom_line(data = ol %>% filter(key %in% c('Italy')),
size = 0.8, alpha = 0.8) +
# geom_point(data = ol %>% filter(key == 'Deaths')) +
geom_point(data = ol %>% filter(country == 'Spain',
key == 'Spain'), size = 4, alpha = 0.6) +
geom_line(data = ol %>% filter(key == 'Deaths'),
size = 0.8) +
# facet_wrap(~paste0(country, '\n',
# '(doubling time: ',
# round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_linetype_manual(name ='',
values = c(1,6,1)) +
scale_color_manual(name = '',
values = cols) +
scale_y_log10() +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >5 deaths',
y = 'Deaths',
title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
theme(strip.text = element_text(size = 13),
plot.title = element_text(size = 15),
axis.title = element_text(size = 18))
Things are changing very rapidly. And measures being taken by these countries will have an impact on the outbreak.
But it’s important to remember that there is a lag between when an intervention takes place and when its effect is notable. Because of the incubation period - the number of days between someone getting infected and becoming sick - what we do today won’t really have an effect until next weekend. And the clinical cases that present today are among people who got infected a week ago.
Disease control measures work. We can see that clearly in the case of Hubei, Wuhan, Iran, Japan. And they will work in Europe too. But because many of these measures were implemented very recently, we won’t likely see a major effect for at least a few more days.
In the mean time, it’s important to practice social distancing. Stay away from others to keep both you and others safe. Listen to Health Authorities. Take this very seriously.
# Madrid vs Lombardy deaths
n_death_start <- 5
pd <- esp_df %>%
# filter(ccaa == 'Madrid') %>%
dplyr::select(date, ccaa, cases, deaths) %>%
bind_rows(ita %>%
# filter(ccaa == 'Lombardia') %>%
dplyr::select(date, ccaa, cases, deaths)) %>%
arrange(date) %>%
group_by(ccaa) %>%
mutate(first_n_death = min(date[deaths >= n_death_start])) %>%
ungroup %>%
mutate(days_since_n_deaths = date - first_n_death) %>%
filter(is.finite(days_since_n_deaths))
pd$country <- pd$ccaa
pd$cases <- pd$cases
countries <- sort(unique(pd$country))
out_list <- curve_list <- list()
counter <- 0
for(i in 1:length(countries)){
message(i)
this_country <- countries[i]
sub_data <- pd %>% filter(country == this_country)
# Only calculate on countries with n_cases_start or greater cases,
# starting at the first day at n_cases_start or greater
# ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
ok <- length(which(sub_data$deaths >= n_deaths_start))
if(ok){
counter <- counter + 1
sub_pd <- sub_data %>%
filter(!is.na(deaths)) %>%
mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
mutate(days_since = date - start_date) %>%
filter(days_since >= 0) %>%
mutate(days_since = as.numeric(days_since))
fit <- lm(log(deaths) ~ days_since, data = sub_pd)
# plot(pd$days_since, log(pd$cases))
# abline(fit)
## Slope
# curve <- fit$coef[2]
# Predict days ahead
fake <- tibble(days_since = seq(0, max(sub_pd$days_since) + 5, by = 1))
fake <- left_join(fake, sub_pd %>% dplyr::select(days_since, deaths, date))
fake$predicted <- exp(predict(fit, newdata = fake))
# Doubling time
dt <- log(2)/fit$coef[2]
out <- tibble(country = this_country,
doubling_time = dt)
out_list[[counter]] <- out
curve_list[[counter]] <- fake %>% mutate(country = this_country)
}
}
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)
# Join doubling time to curves
joined <- left_join(curves, done)
# Make long format
long <- joined %>%
dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
tidyr::gather(key, value, deaths:predicted) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
# Remove those with not enough data to have a doubling time yet
long <- long %>% filter(!is.na(doubling_time))
text_size <- 12
cols <- c('red', 'black')
ggplot(data = long,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
geom_line(data = long %>% filter(key != 'Deaths'),
size = 1.2, alpha = 0.8) +
geom_point(data = long %>% filter(key == 'Deaths')) +
geom_line(data = long %>% filter(key == 'Deaths'),
size = 0.8) +
facet_wrap(~paste0(country, '\n',
'(doubling time: ',
round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_y_log10() +
scale_linetype_manual(name ='',
values = c(1,2)) +
scale_color_manual(name = '',
values = cols) +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >150 cumulative cases',
y = 'Deaths',
title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
theme(strip.text = element_text(size = text_size * 0.5),
plot.title = element_text(size = 15))
Let’s overlay Lombardy
# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))
cols <- c('red', 'blue', 'black')
ggplot(data = ol,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
scale_y_log10() +
geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
size = 1.2, alpha = 0.8) +
geom_line(data = ol %>% filter(key %in% c('Lombardia')),
size = 0.5, alpha = 0.8) +
geom_point(data = ol %>% filter(key == 'Deaths')) +
geom_line(data = ol %>% filter(key == 'Deaths'),
size = 0.8) +
facet_wrap(~paste0(country, '\n',
'(doubling time: ',
round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_linetype_manual(name ='',
values = c(1,6,2)) +
scale_color_manual(name = '',
values = cols) +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >5 deaths',
y = 'Deaths',
title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
theme(strip.text = element_text(size = text_size * 0.5),
plot.title = element_text(size = 15))
Show only Spanish regions vs. Lombardy
text_size <- 14
# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))
# Only Spain
ol <- ol %>% filter(country %in% esp_df$ccaa) %>%
filter(!country %in% 'Aragón')
cols <- c('red', 'blue', 'black')
ggplot(data = ol,
aes(x = days_since,
y = value,
lty = key,
color = key)) +
scale_y_log10() +
geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Lombardia')),
size = 1.2, alpha = 0.8) +
geom_line(data = ol %>% filter(key %in% c('Lombardia')),
size = 0.5, alpha = 0.8) +
geom_point(data = ol %>% filter(key == 'Deaths')) +
geom_line(data = ol %>% filter(key == 'Deaths'),
size = 0.8) +
facet_wrap(~paste0(country, '\n',
'(doubling time: ',
round(doubling_time, digits = 1), ' days)'), scales = 'free') +
theme_simple() +
scale_linetype_manual(name ='',
values = c(1,6,2)) +
scale_color_manual(name = '',
values = cols) +
theme(legend.position = 'top') +
labs(x = 'Days since first day at >5 deaths',
y = 'Deaths',
title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
theme(strip.text = element_text(size = text_size * 0.6),
plot.title = element_text(size = 15))
Same plot but overlayed
Same as above, but overlaid
text_size <-10
# cols <- c('red', 'black')
long <- long %>% filter(country %in% c('Lombardia',
'Emilia Romagna') |
country %in% esp_df$ccaa) %>%
filter(country != 'Aragón')
places <- sort(unique(long$country))
cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 7, 'Spectral'))(length(places))
cols[which(places == 'Madrid')] <- 'red'
cols[which(places == 'Cataluña')] <- 'purple'
cols[which(places == 'Lombardia')] <- 'darkorange'
cols[which(places == 'Emilia Romagna')] <- 'darkgreen'
long$key <- ifelse(long$key != 'Deaths', 'Predicted', long$key)
long$key <- ifelse(long$key == 'Predicted', 'Muertes\nprevistas',
'Muertes\nobservadas')
# Keep only Madrid, Lombardy, Emilia Romagna
long <- long %>%
filter(country %in% c('Madrid',
'Lombardia',
'Emilia Romagna'))
ggplot(data = long,
aes(x = days_since,
y = value,
lty = key,
color = country)) +
geom_point(data = long %>% filter(key == 'Muertes\nobservadas'), size = 2, alpha = 0.8) +
geom_line(data = long %>% filter(key == 'Muertes\nprevistas'), size = 1, alpha = 0.7) +
geom_line(data = long %>% filter(key != 'Muertes\nprevistas'), size = 0.8) +
theme_simple() +
scale_y_log10() +
scale_linetype_manual(name ='',
values = c(1,4)) +
scale_color_manual(name = '',
values = cols) +
theme(legend.position = 'top') +
# labs(x = 'Days since first day at 5 or more cumulative deaths',
# y = 'Deaths',
# title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
# caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
# subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
labs(x = 'Días desde el primer día a 5 o más muertes acumuladas',
y = 'Muertes (escala logarítmica)',
title = 'Muertes por COVID-19',
caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
subtitle = '(Tasa de crecimiento calculada desde el primer día a 5 o más muertes acumuladas)\n(Muertes "previstas": suponiendo que no hay cambios en la tasa de crecimiento)') +
theme(strip.text = element_text(size = text_size * 0.75),
plot.title = element_text(size = text_size * 3),
legend.text = element_text(size = text_size * 1.5),
axis.title = element_text(size = text_size * 2),
axis.text = element_text(size = text_size * 2))
# cols <- c(cols, 'darkorange')
# ggplot(data = ol,
# aes(x = days_since,
# y = value,
# lty = key,
# color = key)) +
# scale_y_log10() +
# geom_line(aes(color = country)) +
#
# # geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
# # size = 1.2, alpha = 0.8) +
# # geom_line(data = ol %>% filter(key %in% c('Lombardia')),
# # size = 0.5, alpha = 0.8) +
# # geom_point(data = ol %>% filter(key == 'Deaths')) +
# # geom_line(data = ol %>% filter(key == 'Deaths'),
# # size = 0.8) +
# theme_simple() +
# scale_linetype_manual(name ='',
# values = c(1,6,2)) +
# scale_color_manual(name = '',
# values = cols) +
# theme(legend.position = 'top') +
# labs(x = 'Days since first day at >5 deaths',
# y = 'Deaths',
# title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
# caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
# subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
# theme(strip.text = element_text(size = text_size * 1),
# plot.title = element_text(size = 15))
# Map data preparation
if(!'map.RData' %in% dir()){
esp1 <- getData(name = 'GADM', country = 'ESP', level = 1)
# Remove canary
esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',]
espf <- fortify(esp1, region = 'NAME_1')
# Standardize names
# Convert names
map_names <- esp1@data$NAME_1
data_names <- sort(unique(esp_df$ccaa))
names_df <- tibble(NAME_1 = c('Andalucía',
'Aragón',
'Cantabria',
'Castilla-La Mancha',
'Castilla y León',
'Cataluña',
'Ceuta y Melilla',
'Comunidad de Madrid',
'Comunidad Foral de Navarra',
'Comunidad Valenciana',
'Extremadura',
'Galicia',
'Islas Baleares',
'La Rioja',
'País Vasco',
'Principado de Asturias',
'Región de Murcia'),
ccaa = c('Andalucía',
'Aragón',
'Cantabria',
'CLM',
'CyL',
'Cataluña',
'Melilla',
'Madrid',
'Navarra',
'C. Valenciana',
'Extremadura',
'Galicia',
'Baleares',
'La Rioja',
'País Vasco',
'Asturias',
'Murcia'))
espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df)
centroids <- data.frame(coordinates(esp1))
names(centroids) <- c('long', 'lat')
centroids$NAME_1 <- esp1$NAME_1
centroids <- centroids %>% left_join(names_df)
# Get random sampling points
random_list <- list()
for(i in 1:nrow(esp1)){
message(i)
shp <- esp1[i,]
# bb <- bbox(shp)
this_ccaa <- esp1@data$NAME_1[i]
# xs <- runif(n = 500, min = bb[1,1], max = bb[1,2])
# ys <- runif(n = 500, min = bb[2,1], max = bb[2,2])
# random_points <- expand.grid(long = xs, lat = ys) %>%
# mutate(x = long,
# y = lat)
# coordinates(random_points) <- ~x+y
# proj4string(random_points) <- proj4string(shp)
# get ccaa
message('getting locations of randomly generated points')
# polys <- over(random_points,polygons(shp))
# polys <- as.numeric(polys)
random_points <- spsample(shp, n = 20000, type = 'random')
random_points <- data.frame(random_points)
random_points$NAME_1 <- this_ccaa
random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1)
random_list[[i]] <- random_points
}
random_points <- bind_rows(random_list)
random_points <- random_points %>% mutate(long = x,
lat = y)
save(espf,
esp1,
names_df,
centroids,
random_points,
file = 'map.RData')
} else {
load('map.RData')
}
# Define a function for adding zerio
add_zero <-
function (x, n)
{
x <- as.character(x)
adders <- n - nchar(x)
adders <- ifelse(adders < 0, 0, adders)
for (i in 1:length(x)) {
if (!is.na(x[i])) {
x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""),
x[i], collapse = "")
}
}
return(x)
}
remake_world_map <- FALSE
options(scipen = '999')
if(remake_world_map){
# World map animation
world <- map_data('world')
# world <- ne_countries(scale = "medium", returnclass = "sf")
# Get plotting data
pd <- df_country %>%
dplyr::select(date, lng, lat, n = cases)
dates <- sort(unique(pd$date))
n_days <- length(dates)
# # Define vectors for projection
# vec_lon <- seq(30, -20, length = n_days)
# vec_lat <- seq(25, 15, length = n_days)
dir.create('animation')
for(i in 1:n_days){
message(i, ' of ', n_days)
this_date <- dates[i]
# this_lon <- vec_lon[i]
# this_lat <- vec_lat[i]
# the_crs <-
# paste0("+proj=laea +lat_0=", this_lat,
# " +lon_0=",
# this_lon,
# " +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs ")
sub_data <- pd %>%
filter(date == this_date)
# coordinates(sub_data) <- ~lng+lat
# proj4string(sub_data) <- proj4string(esp1)
# # sub_data <- spTransform(sub_data,
# # the_crs)
# coordy <- coordinates(sub_data)
# sub_data@data$long <- coordy[,1]
# sub_data@data$lat <- coordy[,2]
g <- ggplot() +
geom_polygon(data = world,
aes(x = long,
y = lat,
group = group),
fill = 'black',
color = 'white',
size = 0.1) +
theme_map() +
geom_point(data = sub_data %>% filter(n > 0) %>% mutate(Deaths = n),
aes(x = lng,
y = lat,
size = Deaths),
color = 'red',
alpha = 0.6) +
geom_point(data = tibble(x = c(0,0), y = c(0,0), Deaths = c(1, 100000)),
aes(x = x,
y = y,
size = Deaths),
color = 'red',
alpha =0.001) +
scale_size_area(name = '', breaks = c(100, 1000, 10000, 100000),
max_size = 25
) +
# scale_size_area(name = '', limits = c(1, 10), breaks = c(0, 10, 30, 50, 70, 100, 200, 500)) +
labs(title = this_date) +
theme(plot.title = element_text(size = 30),
legend.text = element_text(size = 15),
legend.position = 'left')
plot_number <- add_zero(i, 3)
ggsave(filename = paste0('animation/', plot_number, '.png'),
plot = g,
width = 9.5,
height = 5.1)
}
setwd('animation')
system('convert -delay 30x100 -loop 0 *.png result.gif')
setwd('..')
}
make_map <- function(var = 'deaths',
data = NULL,
pop = FALSE,
pop_factor = 100000,
points = FALSE,
line_color = 'white',
add_names = T,
add_values = T,
text_size = 2.7){
if(is.null(data)){
data <- esp_df %>% mutate(ccaa = cat_transform(ccaa))
}
left <- espf %>% mutate(ccaa = cat_transform(ccaa))
right <- data[,c('ccaa', paste0(var, '_non_cum'))]
names(right)[ncol(right)] <- 'var'
right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T))
if(pop){
right <- left_join(right, esp_pop)
right$var <- right$var / right$pop * pop_factor
}
map <- left_join(left, right)
if(points){
the_points <- centroids %>%
left_join(right)
g <- ggplot() +
geom_polygon(data = map,
aes(x = long,
y = lat,
group = group),
fill = 'black',
color = line_color,
lwd = 0.4, alpha = 0.8) +
geom_point(data = the_points,
aes(x = long,
y = lat,
size = var),
color = 'red',
alpha = 0.7) +
scale_size_area(name = '', max_size = 20)
} else {
# cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues')
g <- ggplot(data = map,
aes(x = long,
y = lat,
group = group)) +
geom_polygon(aes(fill = var),
lwd = 0.3,
color = line_color) +
scale_fill_gradientn(name = '',
colours = cols)
# scale_fill_viridis(name = '' ,option = 'magma',
# direction = -1)
}
# Add names?
if(add_names){
centy <- centroids %>% left_join(right)
if(add_values){
centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')')
} else {
centy$label <- centy$ccaa
}
g <- g +
geom_text(data = centy,
aes(x = long,
y = lat,
label = label,
group = ccaa),
alpha = 0.7,
size = text_size)
}
g +
theme_map() +
labs(subtitle = paste0('Data as of ', max(data$date))) +
theme(legend.position = 'right')
}
make_dot_map <- function(var = 'deaths',
date = NULL,
pop = FALSE,
pop_factor = 100,
point_factor = 1,
points = FALSE,
point_color = 'darkred',
point_size = 0.6,
point_alpha = 0.5){
if(is.null(date)){
the_date <- max(esp_df$date)
} else {
the_date <- date
}
right <- esp_df[esp_df$date == the_date,c('ccaa', var)]
names(right)[ncol(right)] <- 'var'
if(pop){
right <- left_join(right, esp_pop)
right$var <- right$var / right$pop * pop_factor
}
map_data <- esp1@data %>%
left_join(names_df) %>%
left_join(right)
map_data$var <- map_data$var / point_factor
out_list <- list()
for(i in 1:nrow(map_data)){
sub_data <- map_data[i,]
this_value = round(sub_data$var)
if(this_value >= 1){
this_ccaa = sub_data$ccaa
# get some points
sub_points <- random_points %>% filter(ccaa == this_ccaa)
sampled_points <- sub_points %>% dplyr::sample_n(this_value)
out_list[[i]] <- sampled_points
}
}
the_points <- bind_rows(out_list)
g <- ggplot() +
geom_polygon(data = espf,
aes(x = long,
y = lat,
group = group),
fill = 'white',
color = 'black',
lwd = 0.4, alpha = 0.8) +
geom_point(data = the_points,
aes(x = long,
y = lat),
color = point_color,
size = point_size,
alpha = point_alpha)
g +
theme_map() +
labs(subtitle = paste0('Data as of ', max(esp_df$date)))
}
make_map(var = 'deaths',
points = T) +
labs(title = 'Number of deaths',
caption = '@joethebrew')
make_map(var = 'deaths',
line_color = 'darkgrey',
points = F) +
labs(title = 'Number of deaths',
caption = '@joethebrew')
make_map(var = 'deaths', pop = TRUE, points = T) +
labs(title = 'Number of deaths per 100,000',
caption = '@joethebrew')
make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') +
labs(title = 'Number of deaths per 100,000',
caption = '@joethebrew')
make_dot_map(var = 'deaths', point_size = 0.05) +
labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location',
caption = '@joethebrew')
Error in if (this_value >= 1) {: missing value where TRUE/FALSE needed
make_map(var = 'cases',
points = T) +
labs(title = 'Number of confirmed cases',
caption = '@joethebrew')
make_map(var = 'cases',
line_color = 'darkgrey',
points = F) +
labs(title = 'Number of confirmed cases',
caption = '@joethebrew')
make_map(var = 'cases', pop = TRUE, points = T) +
labs(title = 'Number of confirmed cases per 100,000',
caption = '@joethebrew')
make_map(var = 'cases', pop = TRUE, points = F,
line_color = 'darkgrey') +
labs(title = 'Number of confirmed cases per 100,000',
caption = '@joethebrew')
make_dot_map(var = 'cases',
point_size = 0.05, point_alpha = 0.5, point_factor = 10) +
labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location',
caption = '@joethebrew')
Error in if (this_value >= 1) {: missing value where TRUE/FALSE needed